Skip to content

[AMDGPU][True16][MC] true16 for v_cmpx_class_f16 #123251

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Conversation

broxigarchen
Copy link
Contributor

@broxigarchen broxigarchen commented Jan 16, 2025

True16 format for v_cmpx_class_f16. Update VOPCX_CLASS t16 and fake16 pseudo.

@broxigarchen broxigarchen force-pushed the main-merge-true16-vopc-mc-more-instruction-7 branch from af0e595 to 8e30ccd Compare January 21, 2025 19:34
@broxigarchen broxigarchen marked this pull request as ready for review January 21, 2025 19:39
@llvmbot llvmbot added backend:AMDGPU mc Machine (object) code labels Jan 21, 2025
@llvmbot
Copy link
Member

llvmbot commented Jan 21, 2025

@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-mc

Author: Brox Chen (broxigarchen)

Changes

True16 format for v_cmpx_class_f16. Update VOPCX_CLASS t16 and fake16 pseudo.


Patch is 149.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123251.diff

29 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/VOPCInstructions.td (+47-19)
  • (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s (+37-28)
  • (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vopcx.s (+15-6)
  • (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopcx.s (+19-10)
  • (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s (+45-30)
  • (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp16.s (+37-28)
  • (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp8.s (+15-6)
  • (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s (+30-12)
  • (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s (+30-12)
  • (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3cx.s (+17-8)
  • (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16.s (+41-32)
  • (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8.s (+19-10)
  • (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopcx.s (+42-30)
  • (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp16.s (+34-28)
  • (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp8.s (+12-6)
  • (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_err.s (+30-12)
  • (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_promote.s (+30-12)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vopcx.txt (+40-14)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vopcx.txt (+16-2)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vopcx.txt (+22-5)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx.txt (+70-15)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp16.txt (+52-14)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp8.txt (+28-2)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx.txt (+32-4)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx_dpp16.txt (+107-15)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx_dpp8.txt (+18-3)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx.txt (+46-15)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp16.txt (+36-14)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp8.txt (+12-2)
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index 91ad2cafe9b54b..0f80271686d4a6 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -957,41 +957,69 @@ multiclass VOPC_Class_Profile_t16<list<SchedReadWrite> sched> {
 }
 
 class VOPC_Class_NoSdst_Profile<list<SchedReadWrite> sched, ValueType src0VT, ValueType src1VT = i32> :
-  VOPC_Class_Profile<sched, src0VT, src1VT> {
+  VOPC_Class_Profile_Base<sched, src0VT, src1VT> {
   let Outs64 = (outs );
   let OutsSDWA = (outs );
   let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
                      Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
                      src0_sel:$src0_sel, src1_sel:$src1_sel);
-  let AsmVOP3Base = "$src0_modifiers, $src1";
+  let HasDst = 0;
   let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel";
   let EmitDst = 0;
 }
 
 multiclass VOPC_Class_NoSdst_Profile_t16<list<SchedReadWrite> sched> {
   def NAME : VOPC_Class_NoSdst_Profile<sched, f16>;
-  def _t16 : VOPC_Class_NoSdst_Profile<sched, f16, i16> {
+  def _t16 : VOPC_Class_NoSdst_Profile<sched, f16, f16> {
     let IsTrue16 = 1;
     let IsRealTrue16 = 1;
-    let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
-    let Src1RC64 = VSrc_b32;
-    let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
-    let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
-    let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
-    let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
-    let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
-    let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
+    let HasOpSel = 1;
+    let HasModifiers = 1; // All instructions at least have OpSel
+    let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+    let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+    let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+    let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+    let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+    let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0/*IsFake16*/>.ret;
+    let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0/*IsFake16*/>.ret;
+    let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0/*IsFake16*/>.ret;
+    let Src0VOP3DPP = VGPRSrc_16;
+    let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
+    let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0/*IsFake16*/>.ret;
+
+    let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
+    let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
+    let Src2RC64 = getVOP3SrcForVT<Src2VT, 1/*IsTrue16*/>.ret;
+    let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+    let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+    let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+    let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret;
+    let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0/*IsFake16*/>.ret;
+    let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0/*IsFake16*/>.ret;
   }
-  def _fake16 : VOPC_Class_NoSdst_Profile<sched, f16, i16> {
+  def _fake16 : VOPC_Class_NoSdst_Profile<sched, f16, f16> {
     let IsTrue16 = 1;
+    let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
     let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
-    let Src1RC64 = VSrc_b32;
     let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
     let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
     let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
-    let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
-    let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
-    let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
+    let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
+    let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
+    let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
+    let Src0VOP3DPP = VGPRSrc_32;
+    let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
+    let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;
+
+    let Src0RC64 = getVOP3SrcForVT<Src0VT, 0/*IsTrue16*/>.ret;
+    let Src1RC64 = getVOP3SrcForVT<Src1VT, 0/*IsTrue16*/>.ret;
+    let Src2RC64 = getVOP3SrcForVT<Src2VT, 0/*IsTrue16*/>.ret;
+    let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+    let Src1Mod = getSrcMod<Src1VT, 0/*IsTrue16*/, 1/*IsFake16*/>.ret;
+    let Src2Mod = getSrcMod<Src2VT, 0/*IsTrue16*/, 1/*IsFake16*/>.ret;
+    let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
+    let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
+    let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
   }
 }
 
@@ -1141,10 +1169,10 @@ multiclass VOPCX_CLASS_F16 <string opName> {
   let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in {
     defm NAME : VOPCX_Class_Pseudos <opName, VOPC_I1_F16_I16, VOPC_F16_I16>;
   }
-  let OtherPredicates = [UseRealTrue16Insts] in {
+  let True16Predicate = UseRealTrue16Insts in {
     defm _t16 : VOPCX_Class_Pseudos <opName#"_t16", VOPC_I1_F16_I16_t16, VOPC_F16_I16_t16>;
   }
-  let OtherPredicates = [UseFakeTrue16Insts] in {
+  let True16Predicate = UseFakeTrue16Insts in {
     defm _fake16 : VOPCX_Class_Pseudos <opName#"_fake16", VOPC_I1_F16_I16_fake16, VOPC_F16_I16_fake16>;
   }
 }
@@ -2044,7 +2072,7 @@ defm V_CMPX_GT_U64    : VOPCX_Real_gfx11_gfx12<0x0dc>;
 defm V_CMPX_NE_U64    : VOPCX_Real_gfx11_gfx12<0x0dd>;
 defm V_CMPX_GE_U64    : VOPCX_Real_gfx11_gfx12<0x0de>;
 defm V_CMPX_T_U64     : VOPCX_Real_gfx11<0x0df>;
-defm V_CMPX_CLASS_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x0fd, "v_cmpx_class_f16">;
+defm V_CMPX_CLASS_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x0fd, "v_cmpx_class_f16">;
 defm V_CMPX_CLASS_F32     : VOPCX_Real_gfx11_gfx12<0x0fe>;
 defm V_CMPX_CLASS_F64     : VOPCX_Real_gfx11_gfx12<0x0ff>;
 
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s
index 60ec94446235ed..379142e84aabdb 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s
@@ -3,47 +3,56 @@
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
 
 
-v_cmpx_class_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_class_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3]
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
 
-v_cmpx_class_f16_e64_dpp v1, v2 row_mirror
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l row_mirror
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
 
-v_cmpx_class_f16_e64_dpp v1, v2 row_half_mirror
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l row_half_mirror
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
 
-v_cmpx_class_f16_e64_dpp v1, v2 row_shl:1
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l row_shl:1
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
 
-v_cmpx_class_f16_e64_dpp v1, v2 row_shl:15
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l row_shl:15
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
 
-v_cmpx_class_f16_e64_dpp v1, v2 row_shr:1
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l row_shr:1
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
 
-v_cmpx_class_f16_e64_dpp v1, v2 row_shr:15
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l row_shr:15
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
 
-v_cmpx_class_f16_e64_dpp v1, v2 row_ror:1
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l row_ror:1
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
 
-v_cmpx_class_f16_e64_dpp v1, v2 row_ror:15
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l row_ror:15
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
 
-v_cmpx_class_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
 
-v_cmpx_class_f16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
 
-v_cmpx_class_f16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
 
-v_cmpx_class_f16_e64_dpp -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cmpx_class_f16_e64_dpp -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x01,0xfd,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x05,0x30]
+v_cmpx_class_f16_e64_dpp -|v255.l|, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmpx_class_f16_e64_dpp -|v255.l|, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x01,0xfd,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x05,0x30]
+
+v_cmpx_class_f16_e64_dpp v1.h, v2.h row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_cmpx_class_f16_e64_dpp v1.h, v2.h op_sel:[1,1] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x18,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01]
+
+v_cmpx_class_f16_e64_dpp v1.h, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_cmpx_class_f16_e64_dpp v1.h, v2.l op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x08,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13]
+
+v_cmpx_class_f16_e64_dpp -|v255.l|, v255.h row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmpx_class_f16_e64_dpp -|v255.l|, v255.h op_sel:[0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x11,0xfd,0xd4,0xfa,0xfe,0x03,0x20,0xff,0x6f,0x05,0x30]
 
 v_cmpx_class_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_class_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vopcx.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vopcx.s
index fb2b28874bd04f..4d6928ecbbc767 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vopcx.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vopcx.s
@@ -2,14 +2,23 @@
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
 
-v_cmpx_class_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfd,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfd,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_class_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// GFX11: v_cmpx_class_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xfd,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cmpx_class_f16_e64_dpp v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: v_cmpx_class_f16_e64_dpp v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x00,0xfd,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_class_f16_e64_dpp -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_cmpx_class_f16_e64_dpp -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x01,0xfd,0xd4,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+v_cmpx_class_f16_e64_dpp -|v255.l|, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmpx_class_f16_e64_dpp -|v255.l|, v255.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x01,0xfd,0xd4,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
+
+v_cmpx_class_f16_e64_dpp v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_class_f16_e64_dpp v1.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xfd,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_class_f16_e64_dpp v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: v_cmpx_class_f16_e64_dpp v1.h, v2.l op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x7e,0x08,0xfd,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_cmpx_class_f16_e64_dpp -|v255.l|, v255.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cmpx_class_f16_e64_dpp -|v255.l|, v255.h op_sel:[0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x7e,0x11,0xfd,0xd4,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00]
 
 v_cmpx_class_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_class_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfe,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopcx.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopcx.s
index 7a95d8cd53cde4..0d8dc8b1bbc8b4 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopcx.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopcx.s
@@ -2,17 +2,17 @@
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
 
-v_cmpx_class_f16_e64 v1, v2
-// GFX11: v_cmpx_class_f16_e64 v1, v2             ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0x05,0x02,0x00]
+v_cmpx_class_f16_e64 v1.l, v2.l
+// GFX11: v_cmpx_class_f16_e64 v1.l, v2.l         ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0x05,0x02,0x00]
 
-v_cmpx_class_f16_e64 v255, v2
-// GFX11: v_cmpx_class_f16_e64 v255, v2           ; encoding: [0x7e,0x00,0xfd,0xd4,0xff,0x05,0x02,0x00]
+v_cmpx_class_f16_e64 v255.l, v2.l
+// GFX11: v_cmpx_class_f16_e64 v255.l, v2.l       ; encoding: [0x7e,0x00,0xfd,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_class_f16_e64 s1, v2
-// GFX11: v_cmpx_class_f16_e64 s1, v2             ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0x04,0x02,0x00]
+v_cmpx_class_f16_e64 s1, v2.l
+// GFX11: v_cmpx_class_f16_e64 s1, v2.l           ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0x04,0x02,0x00]
 
-v_cmpx_class_f16_e64 s105, v255
-// GFX11: v_cmpx_class_f16_e64 s105, v255         ; encoding: [0x7e,0x00,0xfd,0xd4,0x69,0xfe,0x03,0x00]
+v_cmpx_class_f16_e64 s105, v255.l
+// GFX11: v_cmpx_class_f16_e64 s105, v255.l       ; encoding: [0x7e,0x00,0xfd,0xd4,0x69,0xfe,0x03,0x00]
 
 v_cmpx_class_f16_e64 vcc_lo, s2
 // GFX11: v_cmpx_class_f16_e64 vcc_lo, s2         ; encoding: [0x7e,0x00,0xfd,0xd4,0x6a,0x04,0x00,0x00]
@@ -47,8 +47,17 @@ v_cmpx_class_f16_e64 src_scc, vcc_lo
 v_cmpx_class_f16_e64 -|0xfe0b|, vcc_hi
 // GFX11: v_cmpx_class_f16_e64 -|0xfe0b|, vcc_hi  ; encoding: [0x7e,0x01,0xfd,0xd4,0xff,0xd6,0x00,0x20,0x0b,0xfe,0x00,0x00]
 
-v_cmpx_class_f16_e64 v1, 0.5
-// GFX11: v_cmpx_class_f16_e64 v1, 0.5            ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0xe1,0x01,0x00]
+v_cmpx_class_f16_e64 v1.l, 0.5
+// GFX11: v_cmpx_class_f16_e64 v1.l, 0.5          ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0xe1,0x01,0x00]
+
+v_cmpx_class_f16_e64 v1.h, v2.h
+// GFX11: v_cmpx_class_f16_e64 v1.h, v2.h         ; encoding: [0x7e,0x18,0xfd,0xd4,0x01,0x05,0x02,0x00]
+
+v_cmpx_class_f16_e64 v255.h, v2.l
+// GFX11: v_cmpx_class_f16_e64 v255.h, v2.l       ; encoding: [0x7e,0x08,0xfd,0xd4,0xff,0x05,0x02,0x00]
+
+v_cmpx_class_f16_e64 s105, v255.h
+// GFX11: v_cmpx_class_f16_e64 s105, v255.h       ; encoding: [0x7e,0x10,0xfd,0xd4,0x69,0xfe,0x03,0x00]
 
 v_cmpx_class_f32_e64 v1, v2
 // GFX11: v_cmpx_class_f32_e64 v1, v2             ; encoding: [0x7e,0x00,0xfe,0xd4,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s
index 42d7c5ea600b41..d3eff378e630f2 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s
@@ -2,50 +2,65 @@
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
 
-v_cmpx_class_f16_e32 v1, v2
-// GFX11: v_cmpx_class_f16_e32 v1, v2             ; encoding: [0x01,0x05,0xfa,0x7d]
+v_cmpx_class_f16 v1.l, v2.l
+// GFX11: v_cmpx_class_f16_e32 v1.l, v2.l         ; encoding: [0x01,0x05,0xfa,0x7d]
 
-v_cmpx_class_f16 v127, v2
-// GFX11: v_cmpx_class_f16_e32 v127, v2           ; encoding: [0x7f,0x05,0xfa,0x7d]
+v_cmpx_class_f16 v127.l, v2.l
+// GFX11: v_cmpx_class_f16_e32 v127.l, v2.l       ; encoding: [0x7f,0x05,0xfa,0x7d]
 
-v_cmpx_class_f16 s1, v2
-// GFX11: v_cmpx_class_f16_e32 s1, v2         ...
[truncated]

@broxigarchen broxigarchen force-pushed the main-merge-true16-vopc-mc-more-instruction-7 branch from 8e30ccd to 9d98fd1 Compare January 21, 2025 19:42
@broxigarchen broxigarchen force-pushed the main-merge-true16-vopc-mc-more-instruction-7 branch from 9d98fd1 to b167fc1 Compare January 21, 2025 19:44
@broxigarchen broxigarchen requested review from kosarev, arsenm and Sisyph and removed request for arsenm January 21, 2025 21:52
@broxigarchen broxigarchen merged commit 1cf0af3 into llvm:main Jan 22, 2025
8 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AMDGPU mc Machine (object) code
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants