Skip to content

Commit af029d3

Browse files
[libc][reland] Fix builtin definition for memory functions
The memory functions are highly performance sensitive and use builtins where possible, but also need to define those functions names when they don't exist to avoid compilation errors. Previously all those redefinitions were behind the SSE2 flag for x86, which caused errors on CPUs that supported SSE2 but not AVX512. This patch splits the various CPU extensions out to avoid errors on such CPUs. Reviewed By: gchatelet Differential Revision: https://reviews.llvm.org/D138163
1 parent 9e595e9 commit af029d3

File tree

3 files changed

+15
-8
lines changed

3 files changed

+15
-8
lines changed

libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
set(ALL_CPU_FEATURES "")
77

88
if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
9-
set(ALL_CPU_FEATURES SSE2 SSE4_2 AVX2 AVX512F FMA)
9+
set(ALL_CPU_FEATURES SSE2 SSE4_2 AVX2 AVX512F AVX512BW FMA)
1010
set(LIBC_COMPILE_OPTIONS_NATIVE -march=native)
1111
elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})
1212
set(LIBC_COMPILE_OPTIONS_NATIVE -mcpu=native)

libc/src/string/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
355355
add_bcmp(bcmp_x86_64_opt_sse2 COMPILE_OPTIONS -march=k8 REQUIRE SSE2)
356356
add_bcmp(bcmp_x86_64_opt_sse4 COMPILE_OPTIONS -march=nehalem REQUIRE SSE4_2)
357357
add_bcmp(bcmp_x86_64_opt_avx2 COMPILE_OPTIONS -march=haswell REQUIRE AVX2)
358-
add_bcmp(bcmp_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F)
358+
add_bcmp(bcmp_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512BW)
359359
add_bcmp(bcmp_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
360360
add_bcmp(bcmp)
361361
else()
@@ -409,7 +409,7 @@ if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
409409
add_memcmp(memcmp_x86_64_opt_sse2 COMPILE_OPTIONS -march=k8 REQUIRE SSE2)
410410
add_memcmp(memcmp_x86_64_opt_sse4 COMPILE_OPTIONS -march=nehalem REQUIRE SSE4_2)
411411
add_memcmp(memcmp_x86_64_opt_avx2 COMPILE_OPTIONS -march=haswell REQUIRE AVX2)
412-
add_memcmp(memcmp_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F)
412+
add_memcmp(memcmp_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512BW)
413413
add_memcmp(memcmp_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
414414
add_memcmp(memcmp)
415415
elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})

libc/src/string/memory_utils/op_x86.h

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,22 @@
2020
#include "src/string/memory_utils/op_builtin.h"
2121
#include "src/string/memory_utils/op_generic.h"
2222

23-
#ifdef __SSE2__
23+
#if defined(__AVX512BW__) || defined(__AVX512F__) || defined(__AVX2__) || \
24+
defined(__SSE2__)
2425
#include <immintrin.h>
25-
#else
26+
#endif
27+
2628
// Define fake functions to prevent the compiler from failing on undefined
27-
// functions in case SSE2 is not present.
29+
// functions in case the CPU extension is not present.
30+
#if !defined(__AVX512BW__) && (defined(_MSC_VER) || defined(__SCE__))
2831
#define _mm512_cmpneq_epi8_mask(A, B) 0
29-
#define _mm_movemask_epi8(A) 0
32+
#endif
33+
#if !defined(__AVX2__) && (defined(_MSC_VER) || defined(__SCE__))
3034
#define _mm256_movemask_epi8(A) 0
31-
#endif // __SSE2__
35+
#endif
36+
#if !defined(__SSE2__) && (defined(_MSC_VER) || defined(__SCE__))
37+
#define _mm_movemask_epi8(A) 0
38+
#endif
3239

3340
namespace __llvm_libc::x86 {
3441

0 commit comments

Comments
 (0)