@@ -3461,6 +3461,62 @@ std::optional<int64_t> SIInstrInfo::extractSubregFromImm(int64_t Imm,
3461
3461
llvm_unreachable (" covered subregister switch" );
3462
3462
}
3463
3463
3464
+ static unsigned getNewFMAAKInst (const GCNSubtarget &ST, unsigned Opc) {
3465
+ switch (Opc) {
3466
+ case AMDGPU::V_MAC_F16_e32:
3467
+ case AMDGPU::V_MAC_F16_e64:
3468
+ case AMDGPU::V_MAD_F16_e64:
3469
+ return AMDGPU::V_MADAK_F16;
3470
+ case AMDGPU::V_MAC_F32_e32:
3471
+ case AMDGPU::V_MAC_F32_e64:
3472
+ case AMDGPU::V_MAD_F32_e64:
3473
+ return AMDGPU::V_MADAK_F32;
3474
+ case AMDGPU::V_FMAC_F32_e32:
3475
+ case AMDGPU::V_FMAC_F32_e64:
3476
+ case AMDGPU::V_FMA_F32_e64:
3477
+ return AMDGPU::V_FMAAK_F32;
3478
+ case AMDGPU::V_FMAC_F16_e32:
3479
+ case AMDGPU::V_FMAC_F16_e64:
3480
+ case AMDGPU::V_FMAC_F16_t16_e64:
3481
+ case AMDGPU::V_FMAC_F16_fake16_e64:
3482
+ case AMDGPU::V_FMA_F16_e64:
3483
+ return ST.hasTrue16BitInsts () ? ST.useRealTrue16Insts ()
3484
+ ? AMDGPU::V_FMAAK_F16_t16
3485
+ : AMDGPU::V_FMAAK_F16_fake16
3486
+ : AMDGPU::V_FMAAK_F16;
3487
+ default :
3488
+ llvm_unreachable (" invalid instruction" );
3489
+ }
3490
+ }
3491
+
3492
+ static unsigned getNewFMAMKInst (const GCNSubtarget &ST, unsigned Opc) {
3493
+ switch (Opc) {
3494
+ case AMDGPU::V_MAC_F16_e32:
3495
+ case AMDGPU::V_MAC_F16_e64:
3496
+ case AMDGPU::V_MAD_F16_e64:
3497
+ return AMDGPU::V_MADMK_F16;
3498
+ case AMDGPU::V_MAC_F32_e32:
3499
+ case AMDGPU::V_MAC_F32_e64:
3500
+ case AMDGPU::V_MAD_F32_e64:
3501
+ return AMDGPU::V_MADMK_F32;
3502
+ case AMDGPU::V_FMAC_F32_e32:
3503
+ case AMDGPU::V_FMAC_F32_e64:
3504
+ case AMDGPU::V_FMA_F32_e64:
3505
+ return AMDGPU::V_FMAMK_F32;
3506
+ case AMDGPU::V_FMAC_F16_e32:
3507
+ case AMDGPU::V_FMAC_F16_e64:
3508
+ case AMDGPU::V_FMAC_F16_t16_e64:
3509
+ case AMDGPU::V_FMAC_F16_fake16_e64:
3510
+ case AMDGPU::V_FMA_F16_e64:
3511
+ return ST.hasTrue16BitInsts () ? ST.useRealTrue16Insts ()
3512
+ ? AMDGPU::V_FMAMK_F16_t16
3513
+ : AMDGPU::V_FMAMK_F16_fake16
3514
+ : AMDGPU::V_FMAMK_F16;
3515
+ default :
3516
+ llvm_unreachable (" invalid instruction" );
3517
+ }
3518
+ }
3519
+
3464
3520
bool SIInstrInfo::foldImmediate (MachineInstr &UseMI, MachineInstr &DefMI,
3465
3521
Register Reg, MachineRegisterInfo *MRI) const {
3466
3522
if (!MRI->hasOneNonDBGUse (Reg))
@@ -3588,13 +3644,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
3588
3644
!isInlineConstant (Def->getOperand (1 )))
3589
3645
return false ;
3590
3646
3591
- unsigned NewOpc =
3592
- IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32
3593
- : ST.hasTrue16BitInsts () ? ST.useRealTrue16Insts ()
3594
- ? AMDGPU::V_FMAMK_F16_t16
3595
- : AMDGPU::V_FMAMK_F16_fake16
3596
- : AMDGPU::V_FMAMK_F16)
3597
- : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
3647
+ unsigned NewOpc = getNewFMAMKInst (ST, Opc);
3598
3648
if (pseudoToMCOpcode (NewOpc) == -1 )
3599
3649
return false ;
3600
3650
@@ -3671,13 +3721,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
3671
3721
}
3672
3722
}
3673
3723
3674
- unsigned NewOpc =
3675
- IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32
3676
- : ST.hasTrue16BitInsts () ? ST.useRealTrue16Insts ()
3677
- ? AMDGPU::V_FMAAK_F16_t16
3678
- : AMDGPU::V_FMAAK_F16_fake16
3679
- : AMDGPU::V_FMAAK_F16)
3680
- : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
3724
+ unsigned NewOpc = getNewFMAAKInst (ST, Opc);
3681
3725
if (pseudoToMCOpcode (NewOpc) == -1 )
3682
3726
return false ;
3683
3727
@@ -4067,14 +4111,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
4067
4111
4068
4112
int64_t Imm;
4069
4113
if (!Src0Literal && getFoldableImm (Src2, Imm, &DefMI)) {
4070
- unsigned NewOpc =
4071
- IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts ()
4072
- ? ST.useRealTrue16Insts ()
4073
- ? AMDGPU::V_FMAAK_F16_t16
4074
- : AMDGPU::V_FMAAK_F16_fake16
4075
- : AMDGPU::V_FMAAK_F16)
4076
- : AMDGPU::V_FMAAK_F32)
4077
- : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
4114
+ unsigned NewOpc = getNewFMAAKInst (ST, Opc);
4078
4115
if (pseudoToMCOpcode (NewOpc) != -1 ) {
4079
4116
MIB = BuildMI (MBB, MI, MI.getDebugLoc (), get (NewOpc))
4080
4117
.add (*Dst)
@@ -4089,14 +4126,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
4089
4126
return MIB;
4090
4127
}
4091
4128
}
4092
- unsigned NewOpc = IsFMA
4093
- ? (IsF16 ? (ST.hasTrue16BitInsts ()
4094
- ? ST.useRealTrue16Insts ()
4095
- ? AMDGPU::V_FMAMK_F16_t16
4096
- : AMDGPU::V_FMAMK_F16_fake16
4097
- : AMDGPU::V_FMAMK_F16)
4098
- : AMDGPU::V_FMAMK_F32)
4099
- : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
4129
+ unsigned NewOpc = getNewFMAMKInst (ST, Opc);
4100
4130
if (!Src0Literal && getFoldableImm (Src1, Imm, &DefMI)) {
4101
4131
if (pseudoToMCOpcode (NewOpc) != -1 ) {
4102
4132
MIB = BuildMI (MBB, MI, MI.getDebugLoc (), get (NewOpc))
0 commit comments