git.s-ol.nu forks/DiligentCore / 10cf2fa
Update Win32PlatformMisc.hpp Fix ARM64 compilation. MSVC _CountOneBits intrinsics undefined for ARM64. Thomas Bishop authored 2 years ago GitHub committed 2 years ago
1 changed file(s) with 11 addition(s) and 4 deletion(s). Raw diff Collapse all Expand all
107107
108108 inline static Diligent::Uint32 CountOneBits(Diligent::Uint32 Val)
109109 {
110 #if _M_ARM
111 auto Bits = _CountOneBits(Val);
110 #if defined _M_ARM || defined _M_ARM64
111 // MSVC _CountOneBits intrinsics undefined for ARM64
112 // Cast bits to 8x8 datatype and use VCNT on result
113 const uint8x8_t Vsum = vcnt_u8(vcreate_u8(static_cast<uint64_t>(Val)));
114 // Pairwise sums: 8x8 -> 16x4 -> 32x2
115 auto Bits = static_cast<Diligent::Uint32>(vget_lane_u32(vpaddl_u16(vpaddl_u8(Vsum)), 0));
112116 #else
113117 auto Bits = __popcnt(Val);
114118 #endif
118122
119123 inline static Diligent::Uint32 CountOneBits(Diligent::Uint64 Val)
120124 {
121 #if _M_ARM
122 auto Bits = _CountOneBits64(Val);
125 #if defined _M_ARM || defined _M_ARM64
126 // Cast bits to 8x8 datatype and use VCNT on result
127 const uint8x8_t Vsum = vcnt_u8(vcreate_u8(Val));
128 // Pairwise sums: 8x8 -> 16x4 -> 32x2 -> 64x1
129 auto Bits = static_cast<Diligent::Uint32>(vget_lane_u64(vpaddl_u32(vpaddl_u16(vpaddl_u8(Vsum))), 0));
123130 #elif _WIN64
124131 auto Bits = __popcnt64(Val);
125132 #else