Skip to content

Commit 5408024

Browse files
committed
[X86] Move integer hadd/hsub formation into a helper function shared by combineAdd and combineSub.
There was a lot of duplicate code here for checking the VT and subtarget. Moving it into a helper avoids that. It also fixes a bug that combineAdd reused Op0/Op1 after a call to isHorizontalBinOp may have changed it. The new helper function has its own local version of Op0/Op1 that aren't shared by other code. Fixes PR46455. Reviewed By: spatel, bkramer Differential Revision: https://reviews.llvm.org/D83971
1 parent f78d9fc commit 5408024

File tree

2 files changed

+35
-29
lines changed

2 files changed

+35
-29
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 28 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -47606,6 +47606,30 @@ static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1,
4760647606
PMADDBuilder);
4760747607
}
4760847608

47609+
static SDValue combineAddOrSubToHADDorHSUB(SDNode *N, SelectionDAG &DAG,
47610+
const X86Subtarget &Subtarget) {
47611+
EVT VT = N->getValueType(0);
47612+
SDValue Op0 = N->getOperand(0);
47613+
SDValue Op1 = N->getOperand(1);
47614+
bool IsAdd = N->getOpcode() == ISD::ADD;
47615+
assert((IsAdd || N->getOpcode() == ISD::SUB) && "Wrong opcode");
47616+
47617+
if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 ||
47618+
VT == MVT::v8i32) &&
47619+
Subtarget.hasSSSE3() &&
47620+
isHorizontalBinOp(Op0, Op1, DAG, Subtarget, IsAdd)) {
47621+
auto HOpBuilder = [IsAdd](SelectionDAG &DAG, const SDLoc &DL,
47622+
ArrayRef<SDValue> Ops) {
47623+
return DAG.getNode(IsAdd ? X86ISD::HADD : X86ISD::HSUB,
47624+
DL, Ops[0].getValueType(), Ops);
47625+
};
47626+
return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1},
47627+
HOpBuilder);
47628+
}
47629+
47630+
return SDValue();
47631+
}
47632+
4760947633
static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
4761047634
TargetLowering::DAGCombinerInfo &DCI,
4761147635
const X86Subtarget &Subtarget) {
@@ -47619,17 +47643,8 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
4761947643
return MAdd;
4762047644

4762147645
// Try to synthesize horizontal adds from adds of shuffles.
47622-
if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 ||
47623-
VT == MVT::v8i32) &&
47624-
Subtarget.hasSSSE3() &&
47625-
isHorizontalBinOp(Op0, Op1, DAG, Subtarget, true)) {
47626-
auto HADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
47627-
ArrayRef<SDValue> Ops) {
47628-
return DAG.getNode(X86ISD::HADD, DL, Ops[0].getValueType(), Ops);
47629-
};
47630-
return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1},
47631-
HADDBuilder);
47632-
}
47646+
if (SDValue V = combineAddOrSubToHADDorHSUB(N, DAG, Subtarget))
47647+
return V;
4763347648

4763447649
// If vectors of i1 are legal, turn (add (zext (vXi1 X)), Y) into
4763547650
// (sub Y, (sext (vXi1 X))).
@@ -47802,18 +47817,8 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
4780247817
}
4780347818

4780447819
// Try to synthesize horizontal subs from subs of shuffles.
47805-
EVT VT = N->getValueType(0);
47806-
if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 ||
47807-
VT == MVT::v8i32) &&
47808-
Subtarget.hasSSSE3() &&
47809-
isHorizontalBinOp(Op0, Op1, DAG, Subtarget, false)) {
47810-
auto HSUBBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
47811-
ArrayRef<SDValue> Ops) {
47812-
return DAG.getNode(X86ISD::HSUB, DL, Ops[0].getValueType(), Ops);
47813-
};
47814-
return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1},
47815-
HSUBBuilder);
47816-
}
47820+
if (SDValue V = combineAddOrSubToHADDorHSUB(N, DAG, Subtarget))
47821+
return V;
4781747822

4781847823
// Try to create PSUBUS if SUB's argument is max/min
4781947824
if (SDValue V = combineSubToSubus(N, DAG, Subtarget))

llvm/test/CodeGen/X86/pr46455.ll

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,13 @@ define void @EntryModule(i8** %buffer_table) {
88
; CHECK-NEXT: movq (%rdi), %rax
99
; CHECK-NEXT: movq 24(%rdi), %rcx
1010
; CHECK-NEXT: vcmpneqps (%rax), %ymm0, %ymm0
11-
; CHECK-NEXT: vandps {{.*}}(%rip){1to4}, %xmm0, %xmm1
12-
; CHECK-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[2,3,0,1]
13-
; CHECK-NEXT: vpermilps {{.*#+}} xmm3 = xmm1[3,1,2,3]
14-
; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm2
15-
; CHECK-NEXT: vpsubd %xmm0, %xmm1, %xmm0
16-
; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0
11+
; CHECK-NEXT: vpsrld $31, %xmm0, %xmm1
12+
; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,2,3]
13+
; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
14+
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,1,2,3]
15+
; CHECK-NEXT: vpaddd %xmm1, %xmm3, %xmm1
16+
; CHECK-NEXT: vpsubd %xmm0, %xmm2, %xmm0
17+
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
1718
; CHECK-NEXT: vmovd %xmm0, (%rcx)
1819
; CHECK-NEXT: vzeroupper
1920
; CHECK-NEXT: retq

0 commit comments

Comments
 (0)