diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64.c b/compiler-rt/lib/builtins/cpu_model/aarch64.c index ea2da23a95278..def11f88c4854 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64.c +++ b/compiler-rt/lib/builtins/cpu_model/aarch64.c @@ -76,6 +76,8 @@ struct { #elif defined(__linux__) && __has_include() #include "aarch64/fmv/mrs.inc" #include "aarch64/fmv/getauxval.inc" +#elif defined(_WIN32) +#include "aarch64/fmv/windows.inc" #else #include "aarch64/fmv/unimplemented.inc" #endif diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc new file mode 100644 index 0000000000000..2ca18242fba3e --- /dev/null +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc @@ -0,0 +1,85 @@ +#define WIN32_LEAN_AND_MEAN +#include +#include +#include + +#ifndef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE +#define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE 43 +#endif +#ifndef PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE +#define PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE 44 +#endif +#ifndef PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE +#define PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE 45 +#endif +#ifndef PF_ARM_SVE_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_INSTRUCTIONS_AVAILABLE 46 +#endif +#ifndef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE 47 +#endif +#ifndef PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE 50 +#endif +#ifndef PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE 55 +#endif +#ifndef PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE 56 +#endif +#ifndef PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE 57 +#endif +#ifndef PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE 58 +#endif +#ifndef PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE 59 +#endif + +void __init_cpu_features_resolver(unsigned long hwcap, + const __ifunc_arg_t *arg) {} + +void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) { + if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) + return; + +#define setCPUFeature(F) features |= 1ULL << F + + uint64_t features = 0; + + setCPUFeature(FEAT_INIT); + setCPUFeature(FEAT_FP); + + // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent + if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) { + setCPUFeature(FEAT_SHA2); + setCPUFeature(FEAT_PMULL); + } + + static const struct ProcessFeatureToFeatMap_t { + int WinApiFeature; + enum CPUFeatures CPUFeature; + } FeatMap[] = { + {PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE, FEAT_CRC}, + {PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE, FEAT_LSE}, + {PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE, FEAT_DOTPROD}, + {PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE, FEAT_JSCVT}, + {PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE, FEAT_RCPC}, + {PF_ARM_SVE_INSTRUCTIONS_AVAILABLE, FEAT_SVE}, + {PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE, FEAT_SVE2}, + {PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE, FEAT_SVE_PMULL128}, + {PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SHA3}, + {PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SM4}, + {PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F32MM}, + {PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F64MM}, + // There is no I8MM flag, but when SVE_I8MM is available, I8MM is too. + {PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE, FEAT_I8MM}, + }; + + for (size_t I = 0, E = sizeof(FeatMap) / sizeof(FeatMap[0]); I != E; ++I) + if (IsProcessorFeaturePresent(FeatMap[I].WinApiFeature)) + setCPUFeature(FeatMap[I].CPUFeature); + + __atomic_store(&__aarch64_cpu_features.features, &features, __ATOMIC_RELAXED); +} diff --git a/compiler-rt/lib/builtins/cpu_model/cpu_model.h b/compiler-rt/lib/builtins/cpu_model/cpu_model.h index 924ca89cf60f5..3bc4e63c4f25a 100644 --- a/compiler-rt/lib/builtins/cpu_model/cpu_model.h +++ b/compiler-rt/lib/builtins/cpu_model/cpu_model.h @@ -31,7 +31,15 @@ // We're choosing init priority 90 to force our constructors to run before any // constructors in the end user application (starting at priority 101). This // value matches the libgcc choice for the same functions. -#define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(90))) +#ifdef _WIN32 +// Contructor that replaces the ifunc runs currently with prio 10, see +// the LowerIFuncPass. The resolver of FMV depends on the cpu features so set +// the priority to 9. +#define CONSTRUCTOR_PRIORITY 9 +#else +#define CONSTRUCTOR_PRIORITY 90 +#endif +#define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(CONSTRUCTOR_PRIORITY))) #else // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that // this runs during initialization.