-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AMDGPU][True16][MC] true16 for v_cmpx_class_f16 #123251
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
broxigarchen
merged 1 commit into
llvm:main
from
broxigarchen:main-merge-true16-vopc-mc-more-instruction-7
Jan 22, 2025
Merged
[AMDGPU][True16][MC] true16 for v_cmpx_class_f16 #123251
broxigarchen
merged 1 commit into
llvm:main
from
broxigarchen:main-merge-true16-vopc-mc-more-instruction-7
Jan 22, 2025
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
af0e595
to
8e30ccd
Compare
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-mc Author: Brox Chen (broxigarchen) ChangesTrue16 format for v_cmpx_class_f16. Update VOPCX_CLASS t16 and fake16 pseudo. Patch is 149.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123251.diff 29 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index 91ad2cafe9b54b..0f80271686d4a6 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -957,41 +957,69 @@ multiclass VOPC_Class_Profile_t16<list<SchedReadWrite> sched> {
}
class VOPC_Class_NoSdst_Profile<list<SchedReadWrite> sched, ValueType src0VT, ValueType src1VT = i32> :
- VOPC_Class_Profile<sched, src0VT, src1VT> {
+ VOPC_Class_Profile_Base<sched, src0VT, src1VT> {
let Outs64 = (outs );
let OutsSDWA = (outs );
let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
src0_sel:$src0_sel, src1_sel:$src1_sel);
- let AsmVOP3Base = "$src0_modifiers, $src1";
+ let HasDst = 0;
let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel";
let EmitDst = 0;
}
multiclass VOPC_Class_NoSdst_Profile_t16<list<SchedReadWrite> sched> {
def NAME : VOPC_Class_NoSdst_Profile<sched, f16>;
- def _t16 : VOPC_Class_NoSdst_Profile<sched, f16, i16> {
+ def _t16 : VOPC_Class_NoSdst_Profile<sched, f16, f16> {
let IsTrue16 = 1;
let IsRealTrue16 = 1;
- let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src1RC64 = VSrc_b32;
- let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
- let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
- let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
+ let HasOpSel = 1;
+ let HasModifiers = 1; // All instructions at least have OpSel
+ let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0/*IsFake16*/>.ret;
+ let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0/*IsFake16*/>.ret;
+ let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0/*IsFake16*/>.ret;
+ let Src0VOP3DPP = VGPRSrc_16;
+ let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
+ let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0/*IsFake16*/>.ret;
+
+ let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
+ let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
+ let Src2RC64 = getVOP3SrcForVT<Src2VT, 1/*IsTrue16*/>.ret;
+ let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret;
+ let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0/*IsFake16*/>.ret;
+ let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0/*IsFake16*/>.ret;
}
- def _fake16 : VOPC_Class_NoSdst_Profile<sched, f16, i16> {
+ def _fake16 : VOPC_Class_NoSdst_Profile<sched, f16, f16> {
let IsTrue16 = 1;
+ let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src1RC64 = VSrc_b32;
let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
- let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
- let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
+ let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
+ let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
+ let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
+ let Src0VOP3DPP = VGPRSrc_32;
+ let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
+ let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;
+
+ let Src0RC64 = getVOP3SrcForVT<Src0VT, 0/*IsTrue16*/>.ret;
+ let Src1RC64 = getVOP3SrcForVT<Src1VT, 0/*IsTrue16*/>.ret;
+ let Src2RC64 = getVOP3SrcForVT<Src2VT, 0/*IsTrue16*/>.ret;
+ let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let Src1Mod = getSrcMod<Src1VT, 0/*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let Src2Mod = getSrcMod<Src2VT, 0/*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
+ let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
+ let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
}
}
@@ -1141,10 +1169,10 @@ multiclass VOPCX_CLASS_F16 <string opName> {
let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in {
defm NAME : VOPCX_Class_Pseudos <opName, VOPC_I1_F16_I16, VOPC_F16_I16>;
}
- let OtherPredicates = [UseRealTrue16Insts] in {
+ let True16Predicate = UseRealTrue16Insts in {
defm _t16 : VOPCX_Class_Pseudos <opName#"_t16", VOPC_I1_F16_I16_t16, VOPC_F16_I16_t16>;
}
- let OtherPredicates = [UseFakeTrue16Insts] in {
+ let True16Predicate = UseFakeTrue16Insts in {
defm _fake16 : VOPCX_Class_Pseudos <opName#"_fake16", VOPC_I1_F16_I16_fake16, VOPC_F16_I16_fake16>;
}
}
@@ -2044,7 +2072,7 @@ defm V_CMPX_GT_U64 : VOPCX_Real_gfx11_gfx12<0x0dc>;
defm V_CMPX_NE_U64 : VOPCX_Real_gfx11_gfx12<0x0dd>;
defm V_CMPX_GE_U64 : VOPCX_Real_gfx11_gfx12<0x0de>;
defm V_CMPX_T_U64 : VOPCX_Real_gfx11<0x0df>;
-defm V_CMPX_CLASS_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x0fd, "v_cmpx_class_f16">;
+defm V_CMPX_CLASS_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x0fd, "v_cmpx_class_f16">;
defm V_CMPX_CLASS_F32 : VOPCX_Real_gfx11_gfx12<0x0fe>;
defm V_CMPX_CLASS_F64 : VOPCX_Real_gfx11_gfx12<0x0ff>;
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s
index 60ec94446235ed..379142e84aabdb 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s
@@ -3,47 +3,56 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
-v_cmpx_class_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-v_cmpx_class_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3]
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-v_cmpx_class_f16_e64_dpp v1, v2 row_mirror
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l row_mirror
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-v_cmpx_class_f16_e64_dpp v1, v2 row_half_mirror
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l row_half_mirror
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-v_cmpx_class_f16_e64_dpp v1, v2 row_shl:1
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l row_shl:1
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-v_cmpx_class_f16_e64_dpp v1, v2 row_shl:15
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l row_shl:15
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-v_cmpx_class_f16_e64_dpp v1, v2 row_shr:1
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l row_shr:1
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-v_cmpx_class_f16_e64_dpp v1, v2 row_shr:15
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l row_shr:15
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-v_cmpx_class_f16_e64_dpp v1, v2 row_ror:1
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l row_ror:1
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-v_cmpx_class_f16_e64_dpp v1, v2 row_ror:15
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l row_ror:15
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-v_cmpx_class_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_cmpx_class_f16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
-v_cmpx_class_f16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
-v_cmpx_class_f16_e64_dpp -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cmpx_class_f16_e64_dpp -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x01,0xfd,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x05,0x30]
+v_cmpx_class_f16_e64_dpp -|v255.l|, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmpx_class_f16_e64_dpp -|v255.l|, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x01,0xfd,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x05,0x30]
+
+v_cmpx_class_f16_e64_dpp v1.h, v2.h row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_cmpx_class_f16_e64_dpp v1.h, v2.h op_sel:[1,1] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x18,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_class_f16_e64_dpp v1.h, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_cmpx_class_f16_e64_dpp v1.h, v2.l op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x08,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_class_f16_e64_dpp -|v255.l|, v255.h row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmpx_class_f16_e64_dpp -|v255.l|, v255.h op_sel:[0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x11,0xfd,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x05,0x30]
v_cmpx_class_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmpx_class_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vopcx.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vopcx.s
index fb2b28874bd04f..4d6928ecbbc767 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vopcx.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vopcx.s
@@ -2,14 +2,23 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
-v_cmpx_class_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfd,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfd,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_cmpx_class_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xfd,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xfd,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_cmpx_class_f16_e64_dpp -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_cmpx_class_f16_e64_dpp -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x01,0xfd,0xd4,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+v_cmpx_class_f16_e64_dpp -|v255.l|, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmpx_class_f16_e64_dpp -|v255.l|, v255.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x01,0xfd,0xd4,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+
+v_cmpx_class_f16_e64_dpp v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_class_f16_e64_dpp v1.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xfd,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_class_f16_e64_dpp v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: v_cmpx_class_f16_e64_dpp v1.h, v2.l op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x08,0xfd,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_class_f16_e64_dpp -|v255.l|, v255.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmpx_class_f16_e64_dpp -|v255.l|, v255.h op_sel:[0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x11,0xfd,0xd4,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
v_cmpx_class_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmpx_class_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfe,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopcx.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopcx.s
index 7a95d8cd53cde4..0d8dc8b1bbc8b4 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopcx.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopcx.s
@@ -2,17 +2,17 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
-v_cmpx_class_f16_e64 v1, v2
-// GFX11: v_cmpx_class_f16_e64 v1, v2 ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0x05,0x02,0x00]
+v_cmpx_class_f16_e64 v1.l, v2.l
+// GFX11: v_cmpx_class_f16_e64 v1.l, v2.l ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0x05,0x02,0x00]
-v_cmpx_class_f16_e64 v255, v2
-// GFX11: v_cmpx_class_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0xfd,0xd4,0xff,0x05,0x02,0x00]
+v_cmpx_class_f16_e64 v255.l, v2.l
+// GFX11: v_cmpx_class_f16_e64 v255.l, v2.l ; encoding: [0x7e,0x00,0xfd,0xd4,0xff,0x05,0x02,0x00]
-v_cmpx_class_f16_e64 s1, v2
-// GFX11: v_cmpx_class_f16_e64 s1, v2 ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0x04,0x02,0x00]
+v_cmpx_class_f16_e64 s1, v2.l
+// GFX11: v_cmpx_class_f16_e64 s1, v2.l ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0x04,0x02,0x00]
-v_cmpx_class_f16_e64 s105, v255
-// GFX11: v_cmpx_class_f16_e64 s105, v255 ; encoding: [0x7e,0x00,0xfd,0xd4,0x69,0xfe,0x03,0x00]
+v_cmpx_class_f16_e64 s105, v255.l
+// GFX11: v_cmpx_class_f16_e64 s105, v255.l ; encoding: [0x7e,0x00,0xfd,0xd4,0x69,0xfe,0x03,0x00]
v_cmpx_class_f16_e64 vcc_lo, s2
// GFX11: v_cmpx_class_f16_e64 vcc_lo, s2 ; encoding: [0x7e,0x00,0xfd,0xd4,0x6a,0x04,0x00,0x00]
@@ -47,8 +47,17 @@ v_cmpx_class_f16_e64 src_scc, vcc_lo
v_cmpx_class_f16_e64 -|0xfe0b|, vcc_hi
// GFX11: v_cmpx_class_f16_e64 -|0xfe0b|, vcc_hi ; encoding: [0x7e,0x01,0xfd,0xd4,0xff,0xd6,0x00,0x20,0x0b,0xfe,0x00,0x00]
-v_cmpx_class_f16_e64 v1, 0.5
-// GFX11: v_cmpx_class_f16_e64 v1, 0.5 ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0xe1,0x01,0x00]
+v_cmpx_class_f16_e64 v1.l, 0.5
+// GFX11: v_cmpx_class_f16_e64 v1.l, 0.5 ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0xe1,0x01,0x00]
+
+v_cmpx_class_f16_e64 v1.h, v2.h
+// GFX11: v_cmpx_class_f16_e64 v1.h, v2.h ; encoding: [0x7e,0x18,0xfd,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_class_f16_e64 v255.h, v2.l
+// GFX11: v_cmpx_class_f16_e64 v255.h, v2.l ; encoding: [0x7e,0x08,0xfd,0xd4,0xff,0x05,0x02,0x00]
+
+v_cmpx_class_f16_e64 s105, v255.h
+// GFX11: v_cmpx_class_f16_e64 s105, v255.h ; encoding: [0x7e,0x10,0xfd,0xd4,0x69,0xfe,0x03,0x00]
v_cmpx_class_f32_e64 v1, v2
// GFX11: v_cmpx_class_f32_e64 v1, v2 ; encoding: [0x7e,0x00,0xfe,0xd4,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s
index 42d7c5ea600b41..d3eff378e630f2 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s
@@ -2,50 +2,65 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
-v_cmpx_class_f16_e32 v1, v2
-// GFX11: v_cmpx_class_f16_e32 v1, v2 ; encoding: [0x01,0x05,0xfa,0x7d]
+v_cmpx_class_f16 v1.l, v2.l
+// GFX11: v_cmpx_class_f16_e32 v1.l, v2.l ; encoding: [0x01,0x05,0xfa,0x7d]
-v_cmpx_class_f16 v127, v2
-// GFX11: v_cmpx_class_f16_e32 v127, v2 ; encoding: [0x7f,0x05,0xfa,0x7d]
+v_cmpx_class_f16 v127.l, v2.l
+// GFX11: v_cmpx_class_f16_e32 v127.l, v2.l ; encoding: [0x7f,0x05,0xfa,0x7d]
-v_cmpx_class_f16 s1, v2
-// GFX11: v_cmpx_class_f16_e32 s1, v2 ...
[truncated]
|
8e30ccd
to
9d98fd1
Compare
9d98fd1
to
b167fc1
Compare
arsenm
approved these changes
Jan 22, 2025
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
True16 format for v_cmpx_class_f16. Update VOPCX_CLASS t16 and fake16 pseudo.