Skip to content

Commit 03760ad

Browse files
committed
Reapply "[RISCV] Implement RISCVISD::SHL_ADD and move patterns into combine (#89263)"
Changes since original commit: * Rebase over improved test coverage for theadba * Revert change to use TargetConstant as it appears to prevent the uimm2 clause from matching in the XTheadBa patterns. * Fix an order of operands bug in the THeadBa pattern visible in the new test coverage. Original commit message follows: This implements a RISCV specific version of the SHL_ADD node proposed in #88791. If that lands, the infrastructure from this patch should seamlessly switch over the to generic DAG node. I'm posting this separately because I've run out of useful multiply strength reduction work to do without having a way to represent MUL X, 3/5/9 as a single instruction. The majority of this change is moving two sets of patterns out of tablgen and into the post-legalize combine. The major reason for this is that I have an upcoming change which needs to reuse the expansion logic, but it also helps common up some code between zba and the THeadBa variants. On the test changes, there's a couple major categories: * We chose a different lowering for mul x, 25. The new lowering involves one fewer register and the same critical path, so this seems like a win. * The order of the two multiplies changes in (3,5,9)*(3,5,9) in some cases. I don't believe this matters. * I'm removing the one use restriction on the multiply. This restriction doesn't really make sense to me, and the test changes appear positive.
1 parent 1d7086e commit 03760ad

File tree

11 files changed

+140
-138
lines changed

11 files changed

+140
-138
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 39 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -13416,12 +13416,28 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
1341613416
return SDValue();
1341713417
uint64_t MulAmt = CNode->getZExtValue();
1341813418

13419-
// 3/5/9 * 2^N -> shXadd (sll X, C), (sll X, C)
13420-
// Matched in tablegen, avoid perturbing patterns.
13421-
for (uint64_t Divisor : {3, 5, 9})
13422-
if (MulAmt % Divisor == 0 && isPowerOf2_64(MulAmt / Divisor))
13419+
for (uint64_t Divisor : {3, 5, 9}) {
13420+
if (MulAmt % Divisor != 0)
13421+
continue;
13422+
uint64_t MulAmt2 = MulAmt / Divisor;
13423+
// 3/5/9 * 2^N -> shXadd (sll X, C), (sll X, C)
13424+
// Matched in tablegen, avoid perturbing patterns.
13425+
if (isPowerOf2_64(MulAmt2))
1342313426
return SDValue();
1342413427

13428+
// 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
13429+
if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
13430+
SDLoc DL(N);
13431+
SDValue X = DAG.getFreeze(N->getOperand(0));
13432+
SDValue Mul359 =
13433+
DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13434+
DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13435+
return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13436+
DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
13437+
Mul359);
13438+
}
13439+
}
13440+
1342513441
// If this is a power 2 + 2/4/8, we can use a shift followed by a single
1342613442
// shXadd. First check if this a sum of two power of 2s because that's
1342713443
// easy. Then count how many zeros are up to the first bit.
@@ -13440,23 +13456,24 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
1344013456
}
1344113457

1344213458
// 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
13443-
// Matched in tablegen, avoid perturbing patterns.
13444-
switch (MulAmt) {
13445-
case 11:
13446-
case 13:
13447-
case 19:
13448-
case 21:
13449-
case 25:
13450-
case 27:
13451-
case 29:
13452-
case 37:
13453-
case 41:
13454-
case 45:
13455-
case 73:
13456-
case 91:
13457-
return SDValue();
13458-
default:
13459-
break;
13459+
// This is the two instruction form, there are also three instruction
13460+
// variants we could implement. e.g.
13461+
// (2^(1,2,3) * 3,5,9 + 1) << C2
13462+
// 2^(C1>3) * 3,5,9 +/- 1
13463+
for (uint64_t Divisor : {3, 5, 9}) {
13464+
uint64_t C = MulAmt - 1;
13465+
if (C <= Divisor)
13466+
continue;
13467+
unsigned TZ = llvm::countr_zero(C);
13468+
if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
13469+
SDLoc DL(N);
13470+
SDValue X = DAG.getFreeze(N->getOperand(0));
13471+
SDValue Mul359 =
13472+
DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13473+
DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13474+
return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13475+
DAG.getConstant(TZ, DL, VT), X);
13476+
}
1346013477
}
1346113478

1346213479
// 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
@@ -19669,6 +19686,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
1966919686
NODE_NAME_CASE(LLA)
1967019687
NODE_NAME_CASE(ADD_TPREL)
1967119688
NODE_NAME_CASE(MULHSU)
19689+
NODE_NAME_CASE(SHL_ADD)
1967219690
NODE_NAME_CASE(SLLW)
1967319691
NODE_NAME_CASE(SRAW)
1967419692
NODE_NAME_CASE(SRLW)

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,12 @@ enum NodeType : unsigned {
5959

6060
// Multiply high for signedxunsigned.
6161
MULHSU,
62+
63+
// Represents (ADD (SHL a, b), c) with the arguments appearing in the order
64+
// a, b, c. 'b' must be a constant. Maps to sh1add/sh2add/sh3add with zba
65+
// or addsl with XTheadBa.
66+
SHL_ADD,
67+
6268
// RV64I shifts, directly matching the semantics of the named RISC-V
6369
// instructions.
6470
SLLW,

llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,8 @@ multiclass VPatTernaryVMAQA_VV_VX<string intrinsic, string instruction,
538538
let Predicates = [HasVendorXTHeadBa] in {
539539
def : Pat<(add (XLenVT GPR:$rs1), (shl GPR:$rs2, uimm2:$uimm2)),
540540
(TH_ADDSL GPR:$rs1, GPR:$rs2, uimm2:$uimm2)>;
541+
def : Pat<(XLenVT (riscv_shl_add GPR:$rs1, uimm2:$uimm2, GPR:$rs2)),
542+
(TH_ADDSL GPR:$rs2, GPR:$rs1, uimm2:$uimm2)>;
541543

542544
// Reuse complex patterns from StdExtZba
543545
def : Pat<(add_non_imm12 sh1add_op:$rs1, (XLenVT GPR:$rs2)),
@@ -581,30 +583,6 @@ def : Pat<(mul (XLenVT GPR:$r), C9LeftShift:$i),
581583
(SLLI (XLenVT (TH_ADDSL GPR:$r, GPR:$r, 3)),
582584
(TrailingZeros C9LeftShift:$i))>;
583585

584-
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 11)),
585-
(TH_ADDSL GPR:$r, (XLenVT (TH_ADDSL GPR:$r, GPR:$r, 2)), 1)>;
586-
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 19)),
587-
(TH_ADDSL GPR:$r, (XLenVT (TH_ADDSL GPR:$r, GPR:$r, 3)), 1)>;
588-
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 13)),
589-
(TH_ADDSL GPR:$r, (XLenVT (TH_ADDSL GPR:$r, GPR:$r, 1)), 2)>;
590-
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 21)),
591-
(TH_ADDSL GPR:$r, (XLenVT (TH_ADDSL GPR:$r, GPR:$r, 2)), 2)>;
592-
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 37)),
593-
(TH_ADDSL GPR:$r, (XLenVT (TH_ADDSL GPR:$r, GPR:$r, 3)), 2)>;
594-
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 25)),
595-
(TH_ADDSL (XLenVT (TH_ADDSL GPR:$r, GPR:$r, 2)),
596-
(XLenVT (TH_ADDSL GPR:$r, GPR:$r, 2)), 2)>;
597-
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 41)),
598-
(TH_ADDSL GPR:$r, (XLenVT (TH_ADDSL GPR:$r, GPR:$r, 2)), 3)>;
599-
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 73)),
600-
(TH_ADDSL GPR:$r, (XLenVT (TH_ADDSL GPR:$r, GPR:$r, 3)), 3)>;
601-
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 27)),
602-
(TH_ADDSL (XLenVT (TH_ADDSL GPR:$r, GPR:$r, 3)), (XLenVT (TH_ADDSL GPR:$r, GPR:$r, 3)), 1)>;
603-
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 45)),
604-
(TH_ADDSL (XLenVT (TH_ADDSL GPR:$r, GPR:$r, 3)), (XLenVT (TH_ADDSL GPR:$r, GPR:$r, 3)), 2)>;
605-
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 81)),
606-
(TH_ADDSL (XLenVT (TH_ADDSL GPR:$r, GPR:$r, 3)), (XLenVT (TH_ADDSL GPR:$r, GPR:$r, 3)), 3)>;
607-
608586
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 200)),
609587
(SLLI (XLenVT (TH_ADDSL (XLenVT (TH_ADDSL GPR:$r, GPR:$r, 2)),
610588
(XLenVT (TH_ADDSL GPR:$r, GPR:$r, 2)), 2)), 3)>;

llvm/lib/Target/RISCV/RISCVInstrInfoZb.td

Lines changed: 22 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -26,18 +26,24 @@
2626
// Operand and SDNode transformation definitions.
2727
//===----------------------------------------------------------------------===//
2828

29-
def riscv_clzw : SDNode<"RISCVISD::CLZW", SDT_RISCVIntUnaryOpW>;
30-
def riscv_ctzw : SDNode<"RISCVISD::CTZW", SDT_RISCVIntUnaryOpW>;
31-
def riscv_rolw : SDNode<"RISCVISD::ROLW", SDT_RISCVIntBinOpW>;
32-
def riscv_rorw : SDNode<"RISCVISD::RORW", SDT_RISCVIntBinOpW>;
33-
def riscv_brev8 : SDNode<"RISCVISD::BREV8", SDTIntUnaryOp>;
34-
def riscv_orc_b : SDNode<"RISCVISD::ORC_B", SDTIntUnaryOp>;
35-
def riscv_zip : SDNode<"RISCVISD::ZIP", SDTIntUnaryOp>;
36-
def riscv_unzip : SDNode<"RISCVISD::UNZIP", SDTIntUnaryOp>;
37-
def riscv_absw : SDNode<"RISCVISD::ABSW", SDTIntUnaryOp>;
38-
def riscv_clmul : SDNode<"RISCVISD::CLMUL", SDTIntBinOp>;
39-
def riscv_clmulh : SDNode<"RISCVISD::CLMULH", SDTIntBinOp>;
40-
def riscv_clmulr : SDNode<"RISCVISD::CLMULR", SDTIntBinOp>;
29+
def SDTIntShiftAddOp : SDTypeProfile<1, 3, [ // shl_add
30+
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 3>, SDTCisInt<0>, SDTCisInt<2>,
31+
SDTCisInt<3>
32+
]>;
33+
34+
def riscv_shl_add : SDNode<"RISCVISD::SHL_ADD", SDTIntShiftAddOp>;
35+
def riscv_clzw : SDNode<"RISCVISD::CLZW", SDT_RISCVIntUnaryOpW>;
36+
def riscv_ctzw : SDNode<"RISCVISD::CTZW", SDT_RISCVIntUnaryOpW>;
37+
def riscv_rolw : SDNode<"RISCVISD::ROLW", SDT_RISCVIntBinOpW>;
38+
def riscv_rorw : SDNode<"RISCVISD::RORW", SDT_RISCVIntBinOpW>;
39+
def riscv_brev8 : SDNode<"RISCVISD::BREV8", SDTIntUnaryOp>;
40+
def riscv_orc_b : SDNode<"RISCVISD::ORC_B", SDTIntUnaryOp>;
41+
def riscv_zip : SDNode<"RISCVISD::ZIP", SDTIntUnaryOp>;
42+
def riscv_unzip : SDNode<"RISCVISD::UNZIP", SDTIntUnaryOp>;
43+
def riscv_absw : SDNode<"RISCVISD::ABSW", SDTIntUnaryOp>;
44+
def riscv_clmul : SDNode<"RISCVISD::CLMUL", SDTIntBinOp>;
45+
def riscv_clmulh : SDNode<"RISCVISD::CLMULH", SDTIntBinOp>;
46+
def riscv_clmulr : SDNode<"RISCVISD::CLMULR", SDTIntBinOp>;
4147

4248
def UImmLog2XLenHalfAsmOperand : AsmOperandClass {
4349
let Name = "UImmLog2XLenHalf";
@@ -678,6 +684,8 @@ foreach i = {1,2,3} in {
678684
defvar shxadd = !cast<Instruction>("SH"#i#"ADD");
679685
def : Pat<(XLenVT (add_like_non_imm12 (shl GPR:$rs1, (XLenVT i)), GPR:$rs2)),
680686
(shxadd GPR:$rs1, GPR:$rs2)>;
687+
def : Pat<(XLenVT (riscv_shl_add GPR:$rs1, (XLenVT i), GPR:$rs2)),
688+
(shxadd GPR:$rs1, GPR:$rs2)>;
681689

682690
defvar pat = !cast<ComplexPattern>("sh"#i#"add_op");
683691
// More complex cases use a ComplexPattern.
@@ -721,31 +729,6 @@ def : Pat<(mul (XLenVT GPR:$r), C9LeftShift:$i),
721729
(SLLI (XLenVT (SH3ADD GPR:$r, GPR:$r)),
722730
(TrailingZeros C9LeftShift:$i))>;
723731

724-
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 11)),
725-
(SH1ADD (XLenVT (SH2ADD GPR:$r, GPR:$r)), GPR:$r)>;
726-
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 19)),
727-
(SH1ADD (XLenVT (SH3ADD GPR:$r, GPR:$r)), GPR:$r)>;
728-
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 13)),
729-
(SH2ADD (XLenVT (SH1ADD GPR:$r, GPR:$r)), GPR:$r)>;
730-
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 21)),
731-
(SH2ADD (XLenVT (SH2ADD GPR:$r, GPR:$r)), GPR:$r)>;
732-
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 37)),
733-
(SH2ADD (XLenVT (SH3ADD GPR:$r, GPR:$r)), GPR:$r)>;
734-
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 25)),
735-
(SH3ADD (XLenVT (SH1ADD GPR:$r, GPR:$r)), GPR:$r)>;
736-
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 41)),
737-
(SH3ADD (XLenVT (SH2ADD GPR:$r, GPR:$r)), GPR:$r)>;
738-
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 73)),
739-
(SH3ADD (XLenVT (SH3ADD GPR:$r, GPR:$r)), GPR:$r)>;
740-
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 27)),
741-
(SH1ADD (XLenVT (SH3ADD GPR:$r, GPR:$r)),
742-
(XLenVT (SH3ADD GPR:$r, GPR:$r)))>;
743-
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 45)),
744-
(SH2ADD (XLenVT (SH3ADD GPR:$r, GPR:$r)),
745-
(XLenVT (SH3ADD GPR:$r, GPR:$r)))>;
746-
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 81)),
747-
(SH3ADD (XLenVT (SH3ADD GPR:$r, GPR:$r)),
748-
(XLenVT (SH3ADD GPR:$r, GPR:$r)))>;
749732
} // Predicates = [HasStdExtZba]
750733

751734
let Predicates = [HasStdExtZba, IsRV64] in {
@@ -881,6 +864,8 @@ foreach i = {1,2,3} in {
881864
defvar shxadd = !cast<Instruction>("SH"#i#"ADD");
882865
def : Pat<(i32 (add_like_non_imm12 (shl GPR:$rs1, (i64 i)), GPR:$rs2)),
883866
(shxadd GPR:$rs1, GPR:$rs2)>;
867+
def : Pat<(i32 (riscv_shl_add GPR:$rs1, (i32 i), GPR:$rs2)),
868+
(shxadd GPR:$rs1, GPR:$rs2)>;
884869
}
885870
}
886871

llvm/test/CodeGen/RISCV/addimm-mulimm.ll

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -251,10 +251,12 @@ define i64 @add_mul_combine_reject_c3(i64 %x) {
251251
; RV32IMB-LABEL: add_mul_combine_reject_c3:
252252
; RV32IMB: # %bb.0:
253253
; RV32IMB-NEXT: li a2, 73
254-
; RV32IMB-NEXT: mul a1, a1, a2
255-
; RV32IMB-NEXT: mulhu a3, a0, a2
256-
; RV32IMB-NEXT: add a1, a3, a1
257-
; RV32IMB-NEXT: mul a2, a0, a2
254+
; RV32IMB-NEXT: mulhu a2, a0, a2
255+
; RV32IMB-NEXT: sh3add a3, a1, a1
256+
; RV32IMB-NEXT: sh3add a1, a3, a1
257+
; RV32IMB-NEXT: add a1, a2, a1
258+
; RV32IMB-NEXT: sh3add a2, a0, a0
259+
; RV32IMB-NEXT: sh3add a2, a2, a0
258260
; RV32IMB-NEXT: lui a0, 18
259261
; RV32IMB-NEXT: addi a0, a0, -728
260262
; RV32IMB-NEXT: add a0, a2, a0
@@ -518,10 +520,12 @@ define i64 @add_mul_combine_reject_g3(i64 %x) {
518520
; RV32IMB-LABEL: add_mul_combine_reject_g3:
519521
; RV32IMB: # %bb.0:
520522
; RV32IMB-NEXT: li a2, 73
521-
; RV32IMB-NEXT: mul a1, a1, a2
522-
; RV32IMB-NEXT: mulhu a3, a0, a2
523-
; RV32IMB-NEXT: add a1, a3, a1
524-
; RV32IMB-NEXT: mul a2, a0, a2
523+
; RV32IMB-NEXT: mulhu a2, a0, a2
524+
; RV32IMB-NEXT: sh3add a3, a1, a1
525+
; RV32IMB-NEXT: sh3add a1, a3, a1
526+
; RV32IMB-NEXT: add a1, a2, a1
527+
; RV32IMB-NEXT: sh3add a2, a0, a0
528+
; RV32IMB-NEXT: sh3add a2, a2, a0
525529
; RV32IMB-NEXT: lui a0, 2
526530
; RV32IMB-NEXT: addi a0, a0, -882
527531
; RV32IMB-NEXT: add a0, a2, a0

llvm/test/CodeGen/RISCV/rv32zba.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -407,8 +407,8 @@ define i32 @mul25(i32 %a) {
407407
;
408408
; RV32ZBA-LABEL: mul25:
409409
; RV32ZBA: # %bb.0:
410-
; RV32ZBA-NEXT: sh1add a1, a0, a0
411-
; RV32ZBA-NEXT: sh3add a0, a1, a0
410+
; RV32ZBA-NEXT: sh2add a0, a0, a0
411+
; RV32ZBA-NEXT: sh2add a0, a0, a0
412412
; RV32ZBA-NEXT: ret
413413
%c = mul i32 %a, 25
414414
ret i32 %c
@@ -455,8 +455,8 @@ define i32 @mul27(i32 %a) {
455455
;
456456
; RV32ZBA-LABEL: mul27:
457457
; RV32ZBA: # %bb.0:
458-
; RV32ZBA-NEXT: sh3add a0, a0, a0
459458
; RV32ZBA-NEXT: sh1add a0, a0, a0
459+
; RV32ZBA-NEXT: sh3add a0, a0, a0
460460
; RV32ZBA-NEXT: ret
461461
%c = mul i32 %a, 27
462462
ret i32 %c
@@ -471,8 +471,8 @@ define i32 @mul45(i32 %a) {
471471
;
472472
; RV32ZBA-LABEL: mul45:
473473
; RV32ZBA: # %bb.0:
474-
; RV32ZBA-NEXT: sh3add a0, a0, a0
475474
; RV32ZBA-NEXT: sh2add a0, a0, a0
475+
; RV32ZBA-NEXT: sh3add a0, a0, a0
476476
; RV32ZBA-NEXT: ret
477477
%c = mul i32 %a, 45
478478
ret i32 %c

llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -963,8 +963,8 @@ define i64 @mul25(i64 %a) {
963963
;
964964
; RV64ZBA-LABEL: mul25:
965965
; RV64ZBA: # %bb.0:
966-
; RV64ZBA-NEXT: sh1add a1, a0, a0
967-
; RV64ZBA-NEXT: sh3add a0, a1, a0
966+
; RV64ZBA-NEXT: sh2add a0, a0, a0
967+
; RV64ZBA-NEXT: sh2add a0, a0, a0
968968
; RV64ZBA-NEXT: ret
969969
%c = mul i64 %a, 25
970970
ret i64 %c
@@ -1011,8 +1011,8 @@ define i64 @mul27(i64 %a) {
10111011
;
10121012
; RV64ZBA-LABEL: mul27:
10131013
; RV64ZBA: # %bb.0:
1014-
; RV64ZBA-NEXT: sh3add a0, a0, a0
10151014
; RV64ZBA-NEXT: sh1add a0, a0, a0
1015+
; RV64ZBA-NEXT: sh3add a0, a0, a0
10161016
; RV64ZBA-NEXT: ret
10171017
%c = mul i64 %a, 27
10181018
ret i64 %c
@@ -1027,8 +1027,8 @@ define i64 @mul45(i64 %a) {
10271027
;
10281028
; RV64ZBA-LABEL: mul45:
10291029
; RV64ZBA: # %bb.0:
1030-
; RV64ZBA-NEXT: sh3add a0, a0, a0
10311030
; RV64ZBA-NEXT: sh2add a0, a0, a0
1031+
; RV64ZBA-NEXT: sh3add a0, a0, a0
10321032
; RV64ZBA-NEXT: ret
10331033
%c = mul i64 %a, 45
10341034
ret i64 %c

llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -731,12 +731,13 @@ define zeroext i1 @smulo2.i64(i64 %v1, ptr %res) {
731731
; RV64ZBA-LABEL: smulo2.i64:
732732
; RV64ZBA: # %bb.0: # %entry
733733
; RV64ZBA-NEXT: li a2, 13
734-
; RV64ZBA-NEXT: mulh a3, a0, a2
735-
; RV64ZBA-NEXT: mul a2, a0, a2
736-
; RV64ZBA-NEXT: srai a0, a2, 63
737-
; RV64ZBA-NEXT: xor a0, a3, a0
734+
; RV64ZBA-NEXT: mulh a2, a0, a2
735+
; RV64ZBA-NEXT: sh1add a3, a0, a0
736+
; RV64ZBA-NEXT: sh2add a3, a3, a0
737+
; RV64ZBA-NEXT: srai a0, a3, 63
738+
; RV64ZBA-NEXT: xor a0, a2, a0
738739
; RV64ZBA-NEXT: snez a0, a0
739-
; RV64ZBA-NEXT: sd a2, 0(a1)
740+
; RV64ZBA-NEXT: sd a3, 0(a1)
740741
; RV64ZBA-NEXT: ret
741742
;
742743
; RV64ZICOND-LABEL: smulo2.i64:
@@ -925,10 +926,11 @@ define zeroext i1 @umulo2.i64(i64 %v1, ptr %res) {
925926
;
926927
; RV64ZBA-LABEL: umulo2.i64:
927928
; RV64ZBA: # %bb.0: # %entry
928-
; RV64ZBA-NEXT: li a3, 13
929-
; RV64ZBA-NEXT: mulhu a2, a0, a3
929+
; RV64ZBA-NEXT: li a2, 13
930+
; RV64ZBA-NEXT: mulhu a2, a0, a2
930931
; RV64ZBA-NEXT: snez a2, a2
931-
; RV64ZBA-NEXT: mul a0, a0, a3
932+
; RV64ZBA-NEXT: sh1add a3, a0, a0
933+
; RV64ZBA-NEXT: sh2add a0, a3, a0
932934
; RV64ZBA-NEXT: sd a0, 0(a1)
933935
; RV64ZBA-NEXT: mv a0, a2
934936
; RV64ZBA-NEXT: ret

llvm/test/CodeGen/RISCV/rv64xtheadba.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -389,8 +389,8 @@ define i64 @mul27(i64 %a) {
389389
;
390390
; RV64XTHEADBA-LABEL: mul27:
391391
; RV64XTHEADBA: # %bb.0:
392-
; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 3
393392
; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 1
393+
; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 3
394394
; RV64XTHEADBA-NEXT: ret
395395
%c = mul i64 %a, 27
396396
ret i64 %c
@@ -405,8 +405,8 @@ define i64 @mul45(i64 %a) {
405405
;
406406
; RV64XTHEADBA-LABEL: mul45:
407407
; RV64XTHEADBA: # %bb.0:
408-
; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 3
409408
; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 2
409+
; RV64XTHEADBA-NEXT: th.addsl a0, a0, a0, 3
410410
; RV64XTHEADBA-NEXT: ret
411411
%c = mul i64 %a, 45
412412
ret i64 %c

0 commit comments

Comments
 (0)