Update Win32PlatformMisc.hpp
Fix ARM64 compilation. MSVC _CountOneBits intrinsics undefined for ARM64.
Thomas Bishop authored 2 years ago
GitHub committed 2 years ago
107 | 107 | |
108 | 108 | inline static Diligent::Uint32 CountOneBits(Diligent::Uint32 Val) |
109 | 109 | { |
110 | #if _M_ARM | |
111 | auto Bits = _CountOneBits(Val); | |
110 | #if defined _M_ARM || defined _M_ARM64 | |
111 | // MSVC _CountOneBits intrinsics undefined for ARM64 | |
112 | // Cast bits to 8x8 datatype and use VCNT on result | |
113 | const uint8x8_t Vsum = vcnt_u8(vcreate_u8(static_cast<uint64_t>(Val))); | |
114 | // Pairwise sums: 8x8 -> 16x4 -> 32x2 | |
115 | auto Bits = static_cast<Diligent::Uint32>(vget_lane_u32(vpaddl_u16(vpaddl_u8(Vsum)), 0)); | |
112 | 116 | #else |
113 | 117 | auto Bits = __popcnt(Val); |
114 | 118 | #endif |
118 | 122 | |
119 | 123 | inline static Diligent::Uint32 CountOneBits(Diligent::Uint64 Val) |
120 | 124 | { |
121 | #if _M_ARM | |
122 | auto Bits = _CountOneBits64(Val); | |
125 | #if defined _M_ARM || defined _M_ARM64 | |
126 | // Cast bits to 8x8 datatype and use VCNT on result | |
127 | const uint8x8_t Vsum = vcnt_u8(vcreate_u8(Val)); | |
128 | // Pairwise sums: 8x8 -> 16x4 -> 32x2 -> 64x1 | |
129 | auto Bits = static_cast<Diligent::Uint32>(vget_lane_u64(vpaddl_u32(vpaddl_u16(vpaddl_u8(Vsum))), 0)); | |
123 | 130 | #elif _WIN64 |
124 | 131 | auto Bits = __popcnt64(Val); |
125 | 132 | #else |