Skip to content

Commit b3f7ac9

Browse files
LebedevRIarichardson
authored andcommitted
[X86] LowerBUILD_VECTOR(): fix all-UNDEF detection
The original check was trying to avoid checking UndefMask itself, and deduce it via simpler means, but checking `NonZeroMask` does not, e.g., check `ZeroMask`. Fixes llvm/llvm-project#60168
2 parents 867d517 + 1eecf03 commit b3f7ac9

File tree

2 files changed

+77
-6
lines changed

2 files changed

+77
-6
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11167,19 +11167,17 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
1116711167
}
1116811168
}
1116911169

11170-
// All undef vector. Return an UNDEF. All zero vectors were handled above.
11171-
unsigned NumFrozenUndefElts = FrozenUndefMask.countPopulation();
11172-
if (NonZeroMask == 0 && NumFrozenUndefElts != NumElems) {
11173-
assert(UndefMask.isAllOnes() && "Fully undef mask expected");
11170+
// All undef vector. Return an UNDEF.
11171+
if (UndefMask.isAllOnes())
1117411172
return DAG.getUNDEF(VT);
11175-
}
1117611173

1117711174
// If we have multiple FREEZE-UNDEF operands, we are likely going to end up
1117811175
// lowering into a suboptimal insertion sequence. Instead, thaw the UNDEF in
1117911176
// our source BUILD_VECTOR, create another FREEZE-UNDEF splat BUILD_VECTOR,
1118011177
// and blend the FREEZE-UNDEF operands back in.
1118111178
// FIXME: is this worthwhile even for a single FREEZE-UNDEF operand?
11182-
if (NumFrozenUndefElts >= 2 && NumFrozenUndefElts < NumElems) {
11179+
if (unsigned NumFrozenUndefElts = FrozenUndefMask.countPopulation();
11180+
NumFrozenUndefElts >= 2 && NumFrozenUndefElts < NumElems) {
1118311181
SmallVector<int, 16> BlendMask(NumElems, -1);
1118411182
SmallVector<SDValue, 16> Elts(NumElems, DAG.getUNDEF(OpEltVT));
1118511183
for (unsigned i = 0; i < NumElems; ++i) {

llvm/test/CodeGen/X86/build-vector-128.ll

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,3 +552,76 @@ define <4 x float> @PR37502(float %x, float %y) {
552552
ret <4 x float> %i3
553553
}
554554

555+
define void @pr60168_buildvector_of_zeros_and_undef(<2 x i32> %x, ptr %out) {
556+
; SSE2-32-LABEL: pr60168_buildvector_of_zeros_and_undef:
557+
; SSE2-32: # %bb.0:
558+
; SSE2-32-NEXT: movl {{[0-9]+}}(%esp), %eax
559+
; SSE2-32-NEXT: movd %eax, %xmm1
560+
; SSE2-32-NEXT: xorps %xmm2, %xmm2
561+
; SSE2-32-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[0,0]
562+
; SSE2-32-NEXT: paddd %xmm0, %xmm0
563+
; SSE2-32-NEXT: psubd %xmm0, %xmm2
564+
; SSE2-32-NEXT: movdqa %xmm2, %xmm0
565+
; SSE2-32-NEXT: psrad $31, %xmm0
566+
; SSE2-32-NEXT: pxor %xmm0, %xmm2
567+
; SSE2-32-NEXT: psubd %xmm0, %xmm2
568+
; SSE2-32-NEXT: movq %xmm2, (%eax)
569+
; SSE2-32-NEXT: retl
570+
;
571+
; SSE2-64-LABEL: pr60168_buildvector_of_zeros_and_undef:
572+
; SSE2-64: # %bb.0:
573+
; SSE2-64-NEXT: movd %eax, %xmm1
574+
; SSE2-64-NEXT: xorps %xmm2, %xmm2
575+
; SSE2-64-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[0,0]
576+
; SSE2-64-NEXT: paddd %xmm0, %xmm0
577+
; SSE2-64-NEXT: psubd %xmm0, %xmm2
578+
; SSE2-64-NEXT: movdqa %xmm2, %xmm0
579+
; SSE2-64-NEXT: psrad $31, %xmm0
580+
; SSE2-64-NEXT: pxor %xmm0, %xmm2
581+
; SSE2-64-NEXT: psubd %xmm0, %xmm2
582+
; SSE2-64-NEXT: movq %xmm2, (%rdi)
583+
; SSE2-64-NEXT: retq
584+
;
585+
; SSE41-32-LABEL: pr60168_buildvector_of_zeros_and_undef:
586+
; SSE41-32: # %bb.0:
587+
; SSE41-32-NEXT: movl {{[0-9]+}}(%esp), %eax
588+
; SSE41-32-NEXT: paddd %xmm0, %xmm0
589+
; SSE41-32-NEXT: pxor %xmm1, %xmm1
590+
; SSE41-32-NEXT: psubd %xmm0, %xmm1
591+
; SSE41-32-NEXT: pabsd %xmm1, %xmm0
592+
; SSE41-32-NEXT: movq %xmm0, (%eax)
593+
; SSE41-32-NEXT: retl
594+
;
595+
; SSE41-64-LABEL: pr60168_buildvector_of_zeros_and_undef:
596+
; SSE41-64: # %bb.0:
597+
; SSE41-64-NEXT: paddd %xmm0, %xmm0
598+
; SSE41-64-NEXT: pxor %xmm1, %xmm1
599+
; SSE41-64-NEXT: psubd %xmm0, %xmm1
600+
; SSE41-64-NEXT: pabsd %xmm1, %xmm0
601+
; SSE41-64-NEXT: movq %xmm0, (%rdi)
602+
; SSE41-64-NEXT: retq
603+
;
604+
; AVX-32-LABEL: pr60168_buildvector_of_zeros_and_undef:
605+
; AVX-32: # %bb.0:
606+
; AVX-32-NEXT: movl {{[0-9]+}}(%esp), %eax
607+
; AVX-32-NEXT: vpaddd %xmm0, %xmm0, %xmm0
608+
; AVX-32-NEXT: vpxor %xmm1, %xmm1, %xmm1
609+
; AVX-32-NEXT: vpsubd %xmm0, %xmm1, %xmm0
610+
; AVX-32-NEXT: vpabsd %xmm0, %xmm0
611+
; AVX-32-NEXT: vmovq %xmm0, (%eax)
612+
; AVX-32-NEXT: retl
613+
;
614+
; AVX-64-LABEL: pr60168_buildvector_of_zeros_and_undef:
615+
; AVX-64: # %bb.0:
616+
; AVX-64-NEXT: vpaddd %xmm0, %xmm0, %xmm0
617+
; AVX-64-NEXT: vpxor %xmm1, %xmm1, %xmm1
618+
; AVX-64-NEXT: vpsubd %xmm0, %xmm1, %xmm0
619+
; AVX-64-NEXT: vpabsd %xmm0, %xmm0
620+
; AVX-64-NEXT: vmovq %xmm0, (%rdi)
621+
; AVX-64-NEXT: retq
622+
%i2 = mul <2 x i32> %x, <i32 -2, i32 -2>
623+
%i3 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %i2, i1 false)
624+
store <2 x i32> %i3, ptr %out
625+
ret void
626+
}
627+
declare <2 x i32> @llvm.abs.v2i32(<2 x i32>, i1 immarg)

0 commit comments

Comments
 (0)