Skip to content

Commit b9ccd96

Browse files
committed
[Clang] Remove 3-element vector load and store special handling
Clang uses a long-time special handling of the case where 3 element vector loads and stores are performed as 4 element, and then a shufflevector is used to extract the used elements. Odd sized vector codegen should now work reasonably well. This patch removes this special handling, as well as the compiler argument `-fpreserve-vec3-type`.
1 parent 6e0fc15 commit b9ccd96

File tree

11 files changed

+26
-72
lines changed

11 files changed

+26
-72
lines changed

clang/include/clang/Basic/CodeGenOptions.def

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -405,9 +405,6 @@ CODEGENOPT(StrictReturn, 1, 1)
405405
/// Whether emit pseudo probes for sample pgo profile collection.
406406
CODEGENOPT(PseudoProbeForProfiling, 1, 0)
407407

408-
/// Whether 3-component vector type is preserved.
409-
CODEGENOPT(PreserveVec3Type, 1, 0)
410-
411408
CODEGENOPT(NoPLT, 1, 0)
412409

413410
/// Whether to emit all vtables

clang/include/clang/Driver/Options.td

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8026,10 +8026,6 @@ def fhlsl_strict_availability : Flag<["-"], "fhlsl-strict-availability">,
80268026
Group<hlsl_Group>,
80278027
MarshallingInfoFlag<LangOpts<"HLSLStrictAvailability">>;
80288028

8029-
def fpreserve_vec3_type : Flag<["-"], "fpreserve-vec3-type">,
8030-
HelpText<"Preserve 3-component vector type">,
8031-
MarshallingInfoFlag<CodeGenOpts<"PreserveVec3Type">>,
8032-
ImpliedByAnyOf<[hlsl.KeyPath]>;
80338029
def fwchar_type_EQ : Joined<["-"], "fwchar-type=">,
80348030
HelpText<"Select underlying type for wchar_t">,
80358031
Values<"char,short,int">,

clang/lib/CodeGen/CGExpr.cpp

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1967,23 +1967,6 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
19671967

19681968
return EmitFromMemory(V, Ty);
19691969
}
1970-
1971-
// Handle vectors of size 3 like size 4 for better performance.
1972-
const llvm::Type *EltTy = Addr.getElementType();
1973-
const auto *VTy = cast<llvm::FixedVectorType>(EltTy);
1974-
1975-
if (!CGM.getCodeGenOpts().PreserveVec3Type && VTy->getNumElements() == 3) {
1976-
1977-
llvm::VectorType *vec4Ty =
1978-
llvm::FixedVectorType::get(VTy->getElementType(), 4);
1979-
Address Cast = Addr.withElementType(vec4Ty);
1980-
// Now load value.
1981-
llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4");
1982-
1983-
// Shuffle vector to get vec3.
1984-
V = Builder.CreateShuffleVector(V, ArrayRef<int>{0, 1, 2}, "extractVec");
1985-
return EmitFromMemory(V, Ty);
1986-
}
19871970
}
19881971

19891972
// Atomic operations have to be done on integral types.
@@ -2111,24 +2094,6 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
21112094
Addr = Addr.withPointer(Builder.CreateThreadLocalAddress(GV),
21122095
NotKnownNonNull);
21132096

2114-
llvm::Type *SrcTy = Value->getType();
2115-
if (const auto *ClangVecTy = Ty->getAs<VectorType>()) {
2116-
auto *VecTy = dyn_cast<llvm::FixedVectorType>(SrcTy);
2117-
if (!CGM.getCodeGenOpts().PreserveVec3Type) {
2118-
// Handle vec3 special.
2119-
if (VecTy && !ClangVecTy->isExtVectorBoolType() &&
2120-
cast<llvm::FixedVectorType>(VecTy)->getNumElements() == 3) {
2121-
// Our source is a vec3, do a shuffle vector to make it a vec4.
2122-
Value = Builder.CreateShuffleVector(Value, ArrayRef<int>{0, 1, 2, -1},
2123-
"extractVec");
2124-
SrcTy = llvm::FixedVectorType::get(VecTy->getElementType(), 4);
2125-
}
2126-
if (Addr.getElementType() != SrcTy) {
2127-
Addr = Addr.withElementType(SrcTy);
2128-
}
2129-
}
2130-
}
2131-
21322097
Value = EmitToMemory(Value, Ty);
21332098

21342099
LValue AtomicLValue =

clang/test/CodeGen/alignment.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ void test3(packedfloat3 *p) {
4545
*p = (packedfloat3) { 3.2f, 2.3f, 0.1f };
4646
}
4747
// CHECK: @test3(
48-
// CHECK: store <4 x float> {{.*}}, align 4
48+
// CHECK: store <3 x float> {{.*}}, align 4
4949
// CHECK: ret void
5050

5151

clang/test/CodeGen/arm-abi-vector.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ double varargs_vec_3s(int fixed, ...) {
194194
// APCS-GNU: [[VAR:%.*]] = alloca <3 x i16>, align 8
195195
// APCS-GNU: [[AP:%.*]] = load ptr,
196196
// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[AP]], i32 8
197-
// APCS-GNU: [[VEC:%.*]] = load <4 x i16>, ptr [[AP]], align 4
197+
// APCS-GNU: [[VEC:%.*]] = load <3 x i16>, ptr [[AP]], align 4
198198
// ANDROID: varargs_vec_3s
199199
// ANDROID: alloca <3 x i16>, align 8
200200
// ANDROID: [[AP_ALIGN:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr {{%.*}}, i32 -8)

clang/test/CodeGen/arm64-abi-vector.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -415,8 +415,7 @@ double fixed_5i(__int5 *in) {
415415

416416
__attribute__((noinline)) double args_vec_3d(int fixed, __double3 c3) {
417417
// CHECK: args_vec_3d
418-
// CHECK: [[LOAD:%.*]] = load <4 x double>, ptr {{%.*}}
419-
// CHECK: shufflevector <4 x double> [[LOAD]], <4 x double> poison, <3 x i32> <i32 0, i32 1, i32 2>
418+
// CHECK: [[LOAD:%.*]] = load <3 x double>, ptr {{%.*}}
420419
double sum = fixed;
421420
sum = sum + c3.x + c3.y;
422421
return sum;

clang/test/CodeGen/builtins-elementwise-math.c

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,7 @@ void test_builtin_elementwise_min(float f1, float f2, double d1, double d2,
343343
void test_builtin_elementwise_bitreverse(si8 vi1, si8 vi2,
344344
long long int i1, long long int i2, short si,
345345
_BitInt(31) bi1, _BitInt(31) bi2) {
346-
346+
347347

348348
// CHECK: [[I1:%.+]] = load i64, ptr %i1.addr, align 8
349349
// CHECK-NEXT: call i64 @llvm.bitreverse.i64(i64 [[I1]])
@@ -839,13 +839,10 @@ void test_builtin_elementwise_fma(float f32, double f64,
839839

840840

841841
// FIXME: Are we really still doing the 3 vector load workaround
842-
// CHECK: [[V3F64_LOAD_0:%.+]] = load <4 x double>, ptr %v3f64.addr
843-
// CHECK-NEXT: [[V3F64_0:%.+]] = shufflevector
844-
// CHECK-NEXT: [[V3F64_LOAD_1:%.+]] = load <4 x double>, ptr %v3f64.addr
845-
// CHECK-NEXT: [[V3F64_1:%.+]] = shufflevector
846-
// CHECK-NEXT: [[V3F64_LOAD_2:%.+]] = load <4 x double>, ptr %v3f64.addr
847-
// CHECK-NEXT: [[V3F64_2:%.+]] = shufflevector
848-
// CHECK-NEXT: call <3 x double> @llvm.fma.v3f64(<3 x double> [[V3F64_0]], <3 x double> [[V3F64_1]], <3 x double> [[V3F64_2]])
842+
// CHECK: [[V3F64_LOAD_0:%.+]] = load <3 x double>, ptr %v3f64.addr
843+
// CHECK-NEXT: [[V3F64_LOAD_1:%.+]] = load <3 x double>, ptr %v3f64.addr
844+
// CHECK-NEXT: [[V3F64_LOAD_2:%.+]] = load <3 x double>, ptr %v3f64.addr
845+
// CHECK-NEXT: call <3 x double> @llvm.fma.v3f64(<3 x double> [[V3F64_LOAD_0]], <3 x double> [[V3F64_LOAD_1]], <3 x double> [[V3F64_LOAD_2]])
849846
v3f64 = __builtin_elementwise_fma(v3f64, v3f64, v3f64);
850847

851848
// CHECK: [[F64_0:%.+]] = load double, ptr %f64.addr

clang/test/CodeGenOpenCL/amdgpu-alignment.cl

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -94,43 +94,43 @@ typedef double __attribute__((ext_vector_type(16))) double16;
9494
// CHECK-LABEL: @local_memory_alignment_global(
9595
// CHECK: store volatile i8 0, ptr addrspace(3) @local_memory_alignment_global.lds_i8, align 1
9696
// CHECK: store volatile <2 x i8> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v2i8, align 2
97-
// CHECK: store volatile <4 x i8> <i8 0, i8 0, i8 0, i8 undef>, ptr addrspace(3) @local_memory_alignment_global.lds_v3i8, align 4
97+
// CHECK: store volatile <3 x i8> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v3i8, align 4
9898
// CHECK: store volatile <4 x i8> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v4i8, align 4
9999
// CHECK: store volatile <8 x i8> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v8i8, align 8
100100
// CHECK: store volatile <16 x i8> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v16i8, align 16
101101
// CHECK: store volatile i16 0, ptr addrspace(3) @local_memory_alignment_global.lds_i16, align 2
102102
// CHECK: store volatile <2 x i16> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v2i16, align 4
103-
// CHECK: store volatile <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, ptr addrspace(3) @local_memory_alignment_global.lds_v3i16, align 8
103+
// CHECK: store volatile <3 x i16> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v3i16, align 8
104104
// CHECK: store volatile <4 x i16> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v4i16, align 8
105105
// CHECK: store volatile <8 x i16> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v8i16, align 16
106106
// CHECK: store volatile <16 x i16> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v16i16, align 32
107107
// CHECK: store volatile i32 0, ptr addrspace(3) @local_memory_alignment_global.lds_i32, align 4
108108
// CHECK: store volatile <2 x i32> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v2i32, align 8
109-
// CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, ptr addrspace(3) @local_memory_alignment_global.lds_v3i32, align 16
109+
// CHECK: store volatile <3 x i32> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v3i32, align 16
110110
// CHECK: store volatile <4 x i32> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v4i32, align 16
111111
// CHECK: store volatile <8 x i32> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v8i32, align 32
112112
// CHECK: store volatile <16 x i32> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v16i32, align 64
113113
// CHECK: store volatile i64 0, ptr addrspace(3) @local_memory_alignment_global.lds_i64, align 8
114114
// CHECK: store volatile <2 x i64> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v2i64, align 16
115-
// CHECK: store volatile <4 x i64> <i64 0, i64 0, i64 0, i64 undef>, ptr addrspace(3) @local_memory_alignment_global.lds_v3i64, align 32
115+
// CHECK: store volatile <3 x i64> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v3i64, align 32
116116
// CHECK: store volatile <4 x i64> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v4i64, align 32
117117
// CHECK: store volatile <8 x i64> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v8i64, align 64
118118
// CHECK: store volatile <16 x i64> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v16i64, align 128
119119
// CHECK: store volatile half 0xH0000, ptr addrspace(3) @local_memory_alignment_global.lds_f16, align 2
120120
// CHECK: store volatile <2 x half> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v2f16, align 4
121-
// CHECK: store volatile <4 x half> <half 0xH0000, half 0xH0000, half 0xH0000, half undef>, ptr addrspace(3) @local_memory_alignment_global.lds_v3f16, align 8
121+
// CHECK: store volatile <3 x half> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v3f16, align 8
122122
// CHECK: store volatile <4 x half> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v4f16, align 8
123123
// CHECK: store volatile <8 x half> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v8f16, align 16
124124
// CHECK: store volatile <16 x half> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v16f16, align 32
125125
// CHECK: store volatile float 0.000000e+00, ptr addrspace(3) @local_memory_alignment_global.lds_f32, align 4
126126
// CHECK: store volatile <2 x float> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v2f32, align 8
127-
// CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, ptr addrspace(3) @local_memory_alignment_global.lds_v3f32, align 16
127+
// CHECK: store volatile <3 x float> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v3f32, align 16
128128
// CHECK: store volatile <4 x float> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v4f32, align 16
129129
// CHECK: store volatile <8 x float> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v8f32, align 32
130130
// CHECK: store volatile <16 x float> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v16f32, align 64
131131
// CHECK: store volatile double 0.000000e+00, ptr addrspace(3) @local_memory_alignment_global.lds_f64, align 8
132132
// CHECK: store volatile <2 x double> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v2f64, align 16
133-
// CHECK: store volatile <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, ptr addrspace(3) @local_memory_alignment_global.lds_v3f64, align 32
133+
// CHECK: store volatile <3 x double> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v3f64, align 32
134134
// CHECK: store volatile <4 x double> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v4f64, align 32
135135
// CHECK: store volatile <8 x double> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v8f64, align 64
136136
// CHECK: store volatile <16 x double> zeroinitializer, ptr addrspace(3) @local_memory_alignment_global.lds_v16f64, align 128
@@ -381,43 +381,43 @@ kernel void local_memory_alignment_arg(
381381

382382
// CHECK: store volatile i8 0, ptr addrspace(5) %arraydecay, align 1
383383
// CHECK: store volatile <2 x i8> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 2
384-
// CHECK: store volatile <4 x i8> <i8 0, i8 0, i8 0, i8 undef>, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 4
384+
// CHECK: store volatile <3 x i8> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 4
385385
// CHECK: store volatile <4 x i8> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 4
386386
// CHECK: store volatile <8 x i8> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8
387387
// CHECK: store volatile <16 x i8> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
388388
// CHECK: store volatile i16 0, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 2
389389
// CHECK: store volatile <2 x i16> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 4
390-
// CHECK: store volatile <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8
390+
// CHECK: store volatile <3 x i16> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8
391391
// CHECK: store volatile <4 x i16> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8
392392
// CHECK: store volatile <8 x i16> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
393393
// CHECK: store volatile <16 x i16> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32
394394
// CHECK: store volatile i32 0, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 4
395395
// CHECK: store volatile <2 x i32> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8
396-
// CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
396+
// CHECK: store volatile <3 x i32> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
397397
// CHECK: store volatile <4 x i32> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
398398
// CHECK: store volatile <8 x i32> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32
399399
// CHECK: store volatile <16 x i32> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 64
400400
// CHECK: store volatile i64 0, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8
401401
// CHECK: store volatile <2 x i64> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
402-
// CHECK: store volatile <4 x i64> <i64 0, i64 0, i64 0, i64 undef>, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32
402+
// CHECK: store volatile <3 x i64> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32
403403
// CHECK: store volatile <4 x i64> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32
404404
// CHECK: store volatile <8 x i64> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 64
405405
// CHECK: store volatile <16 x i64> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 128
406406
// CHECK: store volatile half 0xH0000, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 2
407407
// CHECK: store volatile <2 x half> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 4
408-
// CHECK: store volatile <4 x half> <half 0xH0000, half 0xH0000, half 0xH0000, half undef>, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8
408+
// CHECK: store volatile <3 x half> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8
409409
// CHECK: store volatile <4 x half> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8
410410
// CHECK: store volatile <8 x half> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
411411
// CHECK: store volatile <16 x half> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32
412412
// CHECK: store volatile float 0.000000e+00, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 4
413413
// CHECK: store volatile <2 x float> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8
414-
// CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
414+
// CHECK: store volatile <3 x float> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
415415
// CHECK: store volatile <4 x float> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
416416
// CHECK: store volatile <8 x float> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32
417417
// CHECK: store volatile <16 x float> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 64
418418
// CHECK: store volatile double 0.000000e+00, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 8
419419
// CHECK: store volatile <2 x double> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 16
420-
// CHECK: store volatile <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32
420+
// CHECK: store volatile <3 x double> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32
421421
// CHECK: store volatile <4 x double> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 32
422422
// CHECK: store volatile <8 x double> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 64
423423
// CHECK: store volatile <16 x double> zeroinitializer, ptr addrspace(5) %arraydecay{{[0-9]+}}, align 128

clang/test/CodeGenOpenCL/preserve_vec3.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown -fpreserve-vec3-type | FileCheck %s
1+
// RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s
22

33
typedef char char3 __attribute__((ext_vector_type(3)));
44
typedef char char8 __attribute__((ext_vector_type(8)));

clang/test/CodeGenOpenCL/vectorLoadStore.cl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ typedef float float4 __attribute((ext_vector_type(4)));
88
// Check for optimized vec3 load/store which treats vec3 as vec4.
99
void foo(char3 *P, char3 *Q) {
1010
*P = *Q;
11-
// CHECK: %{{.*}} = shufflevector <4 x i8> %{{.*}}, <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
11+
// CHECK: %{{.*}} = load <3 x i8>, ptr addrspace(4) %{{.*}}, align 4
12+
// CHECK: store <3 x i8> %{{.*}}, ptr addrspace(4) %{{.*}}, align 4
1213
}
1314

1415
// CHECK: define{{.*}} spir_func void @alignment()

clang/test/CodeGenOpenCL/vector_literals.cl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@ void vector_literals_valid() {
4242
//CHECK: shufflevector <4 x i32> %{{.+}}, <4 x i32> <i32 3, i32 3, i32 undef, i32 undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
4343
int4 a_2_2 = (int4)((int2)(1, 2), (int2)(3));
4444

45-
//CHECK: store <4 x i32> <i32 2, i32 3, i32 4, i32 undef>, ptr
46-
//CHECK: shufflevector <4 x i32> %{{.+}}, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
45+
//CHECK: store <3 x i32> <i32 2, i32 3, i32 4>, ptr
4746
//CHECK: shufflevector <3 x i32> %{{.+}}, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
4847
//CHECK: shufflevector <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>, <4 x i32> %{{.+}}, <4 x i32> <i32 0, i32 4, i32 5, i32 6>
4948
int4 a_1_3 = (int4)(1, (int3)(2, 3, 4));

0 commit comments

Comments
 (0)