-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[Clang] Remove 3-element vector load and store special handling #104661
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-amdgpu Author: Shilei Tian (shiltian) ChangesClang uses a long-time special handling of the case where 3 element vector loads This patch removes this special handling, as well as the compiler argument Patch is 20.50 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/104661.diff 11 Files Affected:
diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
index 09e892d6d4defe..cdd7b60caaf28d 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -405,9 +405,6 @@ CODEGENOPT(StrictReturn, 1, 1)
/// Whether emit pseudo probes for sample pgo profile collection.
CODEGENOPT(PseudoProbeForProfiling, 1, 0)
-/// Whether 3-component vector type is preserved.
-CODEGENOPT(PreserveVec3Type, 1, 0)
-
CODEGENOPT(NoPLT, 1, 0)
/// Whether to emit all vtables
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index cfd9e595c55178..f6486ebd6811b6 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -8026,10 +8026,6 @@ def fhlsl_strict_availability : Flag<["-"], "fhlsl-strict-availability">,
Group<hlsl_Group>,
MarshallingInfoFlag<LangOpts<"HLSLStrictAvailability">>;
-def fpreserve_vec3_type : Flag<["-"], "fpreserve-vec3-type">,
- HelpText<"Preserve 3-component vector type">,
- MarshallingInfoFlag<CodeGenOpts<"PreserveVec3Type">>,
- ImpliedByAnyOf<[hlsl.KeyPath]>;
def fwchar_type_EQ : Joined<["-"], "fwchar-type=">,
HelpText<"Select underlying type for wchar_t">,
Values<"char,short,int">,
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 48d9a3b8a5acb3..1027fcb4130cf1 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -1967,23 +1967,6 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
return EmitFromMemory(V, Ty);
}
-
- // Handle vectors of size 3 like size 4 for better performance.
- const llvm::Type *EltTy = Addr.getElementType();
- const auto *VTy = cast<llvm::FixedVectorType>(EltTy);
-
- if (!CGM.getCodeGenOpts().PreserveVec3Type && VTy->getNumElements() == 3) {
-
- llvm::VectorType *vec4Ty =
- llvm::FixedVectorType::get(VTy->getElementType(), 4);
- Address Cast = Addr.withElementType(vec4Ty);
- // Now load value.
- llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4");
-
- // Shuffle vector to get vec3.
- V = Builder.CreateShuffleVector(V, ArrayRef<int>{0, 1, 2}, "extractVec");
- return EmitFromMemory(V, Ty);
- }
}
// Atomic operations have to be done on integral types.
@@ -2111,24 +2094,6 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
Addr = Addr.withPointer(Builder.CreateThreadLocalAddress(GV),
NotKnownNonNull);
- llvm::Type *SrcTy = Value->getType();
- if (const auto *ClangVecTy = Ty->getAs<VectorType>()) {
- auto *VecTy = dyn_cast<llvm::FixedVectorType>(SrcTy);
- if (!CGM.getCodeGenOpts().PreserveVec3Type) {
- // Handle vec3 special.
- if (VecTy && !ClangVecTy->isExtVectorBoolType() &&
- cast<llvm::FixedVectorType>(VecTy)->getNumElements() == 3) {
- // Our source is a vec3, do a shuffle vector to make it a vec4.
- Value = Builder.CreateShuffleVector(Value, ArrayRef<int>{0, 1, 2, -1},
- "extractVec");
- SrcTy = llvm::FixedVectorType::get(VecTy->getElementType(), 4);
- }
- if (Addr.getElementType() != SrcTy) {
- Addr = Addr.withElementType(SrcTy);
- }
- }
- }
-
Value = EmitToMemory(Value, Ty);
LValue AtomicLValue =
diff --git a/clang/test/CodeGen/alignment.c b/clang/test/CodeGen/alignment.c
index b70297a3898979..b3bfce3c81f90e 100644
--- a/clang/test/CodeGen/alignment.c
+++ b/clang/test/CodeGen/alignment.c
@@ -45,7 +45,7 @@ void test3(packedfloat3 *p) {
*p = (packedfloat3) { 3.2f, 2.3f, 0.1f };
}
// CHECK: @test3(
-// CHECK: store <4 x float> {{.*}}, align 4
+// CHECK: store <3 x float> {{.*}}, align 4
// CHECK: ret void
diff --git a/clang/test/CodeGen/arm-abi-vector.c b/clang/test/CodeGen/arm-abi-vector.c
index c2a8902007980b..7f0cc4bcb0cd85 100644
--- a/clang/test/CodeGen/arm-abi-vector.c
+++ b/clang/test/CodeGen/arm-abi-vector.c
@@ -194,7 +194,7 @@ double varargs_vec_3s(int fixed, ...) {
// APCS-GNU: [[VAR:%.*]] = alloca <3 x i16>, align 8
// APCS-GNU: [[AP:%.*]] = load ptr,
// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[AP]], i32 8
-// APCS-GNU: [[VEC:%.*]] = load <4 x i16>, ptr [[AP]], align 4
+// APCS-GNU: [[VEC:%.*]] = load <3 x i16>, ptr [[AP]], align 4
// ANDROID: varargs_vec_3s
// ANDROID: alloca <3 x i16>, align 8
// ANDROID: [[AP_ALIGN:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr {{%.*}}, i32 -8)
diff --git a/clang/test/CodeGen/arm64-abi-vector.c b/clang/test/CodeGen/arm64-abi-vector.c
index 81e42315c883bd..17dc3da8b8fbc2 100644
--- a/clang/test/CodeGen/arm64-abi-vector.c
+++ b/clang/test/CodeGen/arm64-abi-vector.c
@@ -415,8 +415,7 @@ double fixed_5i(__int5 *in) {
__attribute__((noinline)) double args_vec_3d(int fixed, __double3 c3) {
// CHECK: args_vec_3d
- // CHECK: [[LOAD:%.*]] = load <4 x double>, ptr {{%.*}}
- // CHECK: shufflevector <4 x double> [[LOAD]], <4 x double> poison, <3 x i32> <i32 0, i32 1, i32 2>
+ // CHECK: [[LOAD:%.*]] = load <3 x double>, ptr {{%.*}}
double sum = fixed;
sum = sum + c3.x + c3.y;
return sum;
diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c
index 8fb52992c0fe68..fe7942d1acab7d 100644
--- a/clang/test/CodeGen/builtins-elementwise-math.c
+++ b/clang/test/CodeGen/builtins-elementwise-math.c
@@ -343,7 +343,7 @@ void test_builtin_elementwise_min(float f1, float f2, double d1, double d2,
void test_builtin_elementwise_bitreverse(si8 vi1, si8 vi2,
long long int i1, long long int i2, short si,
_BitInt(31) bi1, _BitInt(31) bi2) {
-
+
// CHECK: [[I1:%.+]] = load i64, ptr %i1.addr, align 8
// CHECK-NEXT: call i64 @llvm.bitreverse.i64(i64 [[I1]])
@@ -839,13 +839,10 @@ void test_builtin_elementwise_fma(float f32, double f64,
// FIXME: Are we really still doing the 3 vector load workaround
- // CHECK: [[V3F64_LOAD_0:%.+]] = load <4 x double>, ptr %v3f64.addr
- // CHECK-NEXT: [[V3F64_0:%.+]] = shufflevector
- // CHECK-NEXT: [[V3F64_LOAD_1:%.+]] = load <4 x double>, ptr %v3f64.addr
- // CHECK-NEXT: [[V3F64_1:%.+]] = shufflevector
- // CHECK-NEXT: [[V3F64_LOAD_2:%.+]] = load <4 x double>, ptr %v3f64.addr
- // CHECK-NEXT: [[V3F64_2:%.+]] = shufflevector
- // CHECK-NEXT: call <3 x double> @llvm.fma.v3f64(<3 x double> [[V3F64_0]], <3 x double> [[V3F64_1]], <3 x double> [[V3F64_2]])
+ // CHECK: [[V3F64_LOAD_0:%.+]] = load <3 x double>, ptr %v3f64.addr
+ // CHECK-NEXT: [[V3F64_LOAD_1:%.+]] = load <3 x double>, ptr %v3f64.addr
+ // CHECK-NEXT: [[V3F64_LOAD_2:%.+]] = load <3 x double>, ptr %v3f64.addr
+ // CHECK-NEXT: call <3 x double> @llvm.fma.v3f64(<3 x double> [[V3F64_LOAD_0]], <3 x double> [[V3F64_LOAD_1]], <3 x double> [[V3F64_LOAD_2]])
v3f64 = __builtin_elementwise_fma(v3f64, v3f64, v3f64);
// CHECK: [[F64_0:%.+]] = load double, ptr %f64.addr
diff --git a/clang/test/CodeGenOpenCL/amdgpu-alignment.cl b/clang/test/CodeGenOpenCL/amdgpu-alignment.cl
index 8f57713fe1f041..52a5d01588875f 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-alignment.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-alignment.cl
@@ -94,43 +94,43 @@ typedef double __attribute__((ext_vector_type(16))) double16;
// CHECK-LABEL: @local_memory_alignment_global(
// CHECK: store volatile i8 0, ptr addrspace(3) @local_memory_alignment_global.lds_i8, align 1
// CHECK: store volatile <2 x i8> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v2i8, align 2
-// CHECK: store volatile <4 x i8> <i8 0, i8 0, i8 0, i8 undef>, ptr addrspace(3) @local_memory_alignment_global.lds_v3i8, align 4
+// CHECK: store volatile <3 x i8> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v3i8, align 4
// CHECK: store volatile <4 x i8> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v4i8, align 4
// CHECK: store volatile <8 x i8> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v8i8, align 8
// CHECK: store volatile <16 x i8> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v16i8, align 16
// CHECK: store volatile i16 0, ptr addrspace(3) @local_memory_alignment_global.lds_i16, align 2
// CHECK: store volatile <2 x i16> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v2i16, align 4
-// CHECK: store volatile <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, ptr addrspace(3) @local_memory_alignment_global.lds_v3i16, align 8
+// CHECK: store volatile <3 x i16> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v3i16, align 8
// CHECK: store volatile <4 x i16> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v4i16, align 8
// CHECK: store volatile <8 x i16> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v8i16, align 16
// CHECK: store volatile <16 x i16> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v16i16, align 32
// CHECK: store volatile i32 0, ptr addrspace(3) @local_memory_alignment_global.lds_i32, align 4
// CHECK: store volatile <2 x i32> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v2i32, align 8
-// CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, ptr addrspace(3) @local_memory_alignment_global.lds_v3i32, align 16
+// CHECK: store volatile <3 x i32> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v3i32, align 16
// CHECK: store volatile <4 x i32> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v4i32, align 16
// CHECK: store volatile <8 x i32> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v8i32, align 32
// CHECK: store volatile <16 x i32> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v16i32, align 64
// CHECK: store volatile i64 0, ptr addrspace(3) @local_memory_alignment_global.lds_i64, align 8
// CHECK: store volatile <2 x i64> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v2i64, align 16
-// CHECK: store volatile <4 x i64> <i64 0, i64 0, i64 0, i64 undef>, ptr addrspace(3) @local_memory_alignment_global.lds_v3i64, align 32
+// CHECK: store volatile <3 x i64> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v3i64, align 32
// CHECK: store volatile <4 x i64> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v4i64, align 32
// CHECK: store volatile <8 x i64> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v8i64, align 64
// CHECK: store volatile <16 x i64> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v16i64, align 128
// CHECK: store volatile half 0xH0000, ptr addrspace(3) @local_memory_alignment_global.lds_f16, align 2
// CHECK: store volatile <2 x half> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v2f16, align 4
-// CHECK: store volatile <4 x half> <half 0xH0000, half 0xH0000, half 0xH0000, half undef>, ptr addrspace(3) @local_memory_alignment_global.lds_v3f16, align 8
+// CHECK: store volatile <3 x half> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v3f16, align 8
// CHECK: store volatile <4 x half> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v4f16, align 8
// CHECK: store volatile <8 x half> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v8f16, align 16
// CHECK: store volatile <16 x half> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v16f16, align 32
// CHECK: store volatile float 0.000000e+00, ptr addrspace(3) @local_memory_alignment_global.lds_f32, align 4
// CHECK: store volatile <2 x float> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v2f32, align 8
-// CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, ptr addrspace(3) @local_memory_alignment_global.lds_v3f32, align 16
+// CHECK: store volatile <3 x float> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v3f32, align 16
// CHECK: store volatile <4 x float> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v4f32, align 16
// CHECK: store volatile <8 x float> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v8f32, align 32
// CHECK: store volatile <16 x float> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v16f32, align 64
// CHECK: store volatile double 0.000000e+00, ptr addrspace(3) @local_memory_alignment_global.lds_f64, align 8
// CHECK: store volatile <2 x double> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v2f64, align 16
-// CHECK: store volatile <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, ptr addrspace(3) @local_memory_alignment_global.lds_v3f64, align 32
+// CHECK: store volatile <3 x double> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v3f64, align 32
// CHECK: store volatile <4 x double> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v4f64, align 32
// CHECK: store volatile <8 x double> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v8f64, align 64
// CHECK: store volatile <16 x double> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v16f64, align 128
@@ -381,43 +381,43 @@ kernel void local_memory_alignment_arg(
// CHECK: store volatile i8 0, ptr addrspace(5) %arraydecay, align 1
// CHECK: store volatile <2 x i8> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 2
-// CHECK: store volatile <4 x i8> <i8 0, i8 0, i8 0, i8 undef>, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 4
+// CHECK: store volatile <3 x i8> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 4
// CHECK: store volatile <4 x i8> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 4
// CHECK: store volatile <8 x i8> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8
// CHECK: store volatile <16 x i8> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
// CHECK: store volatile i16 0, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 2
// CHECK: store volatile <2 x i16> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 4
-// CHECK: store volatile <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8
+// CHECK: store volatile <3 x i16> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8
// CHECK: store volatile <4 x i16> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8
// CHECK: store volatile <8 x i16> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
// CHECK: store volatile <16 x i16> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32
// CHECK: store volatile i32 0, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 4
// CHECK: store volatile <2 x i32> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8
-// CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
+// CHECK: store volatile <3 x i32> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
// CHECK: store volatile <4 x i32> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
// CHECK: store volatile <8 x i32> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32
// CHECK: store volatile <16 x i32> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 64
// CHECK: store volatile i64 0, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8
// CHECK: store volatile <2 x i64> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
-// CHECK: store volatile <4 x i64> <i64 0, i64 0, i64 0, i64 undef>, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32
+// CHECK: store volatile <3 x i64> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32
// CHECK: store volatile <4 x i64> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32
// CHECK: store volatile <8 x i64> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 64
// CHECK: store volatile <16 x i64> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 128
// CHECK: store volatile half 0xH0000, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 2
// CHECK: store volatile <2 x half> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 4
-// CHECK: store volatile <4 x half> <half 0xH0000, half 0xH0000, half 0xH0000, half undef>, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8
+// CHECK: store volatile <3 x half> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8
// CHECK: store volatile <4 x half> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8
// CHECK: store volatile <8 x half> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
// CHECK: store volatile <16 x half> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32
// CHECK: store volatile float 0.000000e+00, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 4
// CHECK: store volatile <2 x float> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8
-// CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
+// CHECK: store volatile <3 x float> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
// CHECK: store volatile <4 x float> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
// CHECK: store volatile <8 x float> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32
// CHECK: store volatile <16 x float> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 64
// CHECK: store volatile double 0.000000e+00, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8
// CHECK: store volatile <2 x double> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
-// CHECK: store volatile <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32
+// CHECK: store volatile <3 x double> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32
// CHECK: store volatile <4 x double> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32
// CHECK: store volatile <8 x double> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 64
// CHECK: store volatile <16 x double> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 128
diff --git a/clang/test/CodeGenOpenCL/preserve_vec3.cl b/clang/test/CodeGenOpenCL/preserve_vec3.cl
index 19f0cdff60a9d6..8c89e1651edc11 100644
--- a/clang/test/CodeGenOpenCL/preserve_vec3.cl
+++ b/clang/test/CodeGenOpenCL/preserve_vec3.cl
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown -fpreserve-vec3-type | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s
typedef char char3 __attribute__((ext_vector_type(3)));
typedef char char8 __attribute__((ext_vector_type(8)));
diff --git a/clang/test/CodeGenOpenCL/vectorLoadStore.cl b/clang/test/CodeGenOpenCL/vectorLoadStore.cl
index 7f8992929b4890..97f694788f7533 100644
--- a/clang/test/CodeGenOpenCL/vectorLoadStore.cl
+++ b/clang/test/CodeGenOpenCL/vectorLoadStore.cl
@@ -8,7 +8,8 @@ typedef float float4 __attribute((ext_vector_type(4)));
// Check for optimized vec3 load/store which treats vec3 as vec4.
void foo(char3 *P, char3 *Q) {
*P = *Q;
- // CHECK: %{{.*}} = shufflevector <4 x i8> %{{.*}}, <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
+ // CHECK: %{{.*}} = load <3 x i8>, ptr addrspace(4) %{{.*}}, align 4
+ // CHECK: store <3 x i8> %{{.*}}, ptr addrspace(4) %{{.*}}, align 4
}
// CHECK: define{{.*}} spir_func void @alignment()
diff --git a/clang/test/CodeGenOpenCL/vector_literals.cl b/clang/test/CodeGenOpenCL/vector_literals.cl
index f69f339ca99aaf..efe240ade111ab 100644
--- a/clang/test/CodeGenOpenCL/vector_literals.cl
+++ b/clang/test/CodeGenOpenCL/vector_literals.cl
@@ -42,8 +42,7 @@ void vector_literals_valid() {
//CHECK: shufflevector <4 x i32> %{{.+}}, <4 x i32> <i32 3, i32 3, i32 undef, i32 undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
int4 a_2_2 = (int4)((int2)(1, 2), (in...
[truncated]
|
b9ccd96
to
c422624
Compare
e4b3028
to
69d4919
Compare
gentle ping |
9e23538
to
f01858b
Compare
f01858b
to
9dfb68d
Compare
9dfb68d
to
76db8c6
Compare
76db8c6
to
e4759ca
Compare
e4759ca
to
ac2bd8c
Compare
ac2bd8c
to
06bb0e1
Compare
bump bump |
06bb0e1
to
6899935
Compare
Clang uses a long-time special handling of the case where 3 element vector loads and stores are performed as 4 element, and then a shufflevector is used to extract the used elements. Odd sized vector codegen should now work reasonably well. This patch removes this special handling, as well as the compiler argument `-fpreserve-vec3-type`.
6899935
to
5729891
Compare
Co-authored-by: Matt Arsenault <[email protected]>
Co-authored-by: Matt Arsenault <[email protected]>
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/2/builds/15293 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/46/builds/10790 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/197/builds/934 Here is the relevant piece of the build log for the reference
|
The compile error was resolved by 59dffce. |
Clang uses a long-time special handling of the case where 3 element vector loads and stores are performed as 4 element, and then a shufflevector is used to extract the used elements. Odd sized vector codegen should now work reasonably well.
This patch removes the compiler argument
-fpreserve-vec3-type
and adds a target hook to determine if the special handling of vector type is needed.