-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AMDGPU][True16][MC] true16 for v_cmpx_xx_f16 #123419
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU][True16][MC] true16 for v_cmpx_xx_f16 #123419
Conversation
31953bd
to
a078660
Compare
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-amdgpu Author: Brox Chen (broxigarchen) ChangesA bulk commit of true16 support for v_cmpx_xx_f16 instructions including: v_cmpx_f_f16 Patch is 1.65 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123419.diff 33 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index bba8aa570d2b58..ebc2d622bb16a3 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -1924,22 +1924,22 @@ defm V_CMP_CLASS_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x07d, "v_cmp_class_f16"
defm V_CMP_CLASS_F32 : VOPC_Real_gfx11_gfx12<0x07e>;
defm V_CMP_CLASS_F64 : VOPC_Real_gfx11_gfx12<0x07f>;
-defm V_CMPX_F_F16_fake16 : VOPCX_Real_t16_gfx11<0x080, "v_cmpx_f_f16">;
+defm V_CMPX_F_F16 : VOPCX_Real_t16_and_fake16_gfx11<0x080, "v_cmpx_f_f16">;
defm V_CMPX_LT_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x081, "v_cmpx_lt_f16">;
-defm V_CMPX_EQ_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x082, "v_cmpx_eq_f16">;
-defm V_CMPX_LE_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x083, "v_cmpx_le_f16">;
-defm V_CMPX_GT_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x084, "v_cmpx_gt_f16">;
-defm V_CMPX_LG_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x085, "v_cmpx_lg_f16">;
-defm V_CMPX_GE_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x086, "v_cmpx_ge_f16">;
-defm V_CMPX_O_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x087, "v_cmpx_o_f16">;
-defm V_CMPX_U_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x088, "v_cmpx_u_f16">;
-defm V_CMPX_NGE_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x089, "v_cmpx_nge_f16">;
-defm V_CMPX_NLG_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x08a, "v_cmpx_nlg_f16">;
-defm V_CMPX_NGT_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x08b, "v_cmpx_ngt_f16">;
-defm V_CMPX_NLE_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x08c, "v_cmpx_nle_f16">;
-defm V_CMPX_NEQ_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x08d, "v_cmpx_neq_f16">;
-defm V_CMPX_NLT_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x08e, "v_cmpx_nlt_f16">;
-defm V_CMPX_T_F16_fake16 : VOPCX_Real_with_name_gfx11<0x08f, "V_CMPX_TRU_F16_fake16", "v_cmpx_t_f16", "v_cmpx_tru_f16">;
+defm V_CMPX_EQ_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x082, "v_cmpx_eq_f16">;
+defm V_CMPX_LE_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x083, "v_cmpx_le_f16">;
+defm V_CMPX_GT_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x084, "v_cmpx_gt_f16">;
+defm V_CMPX_LG_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x085, "v_cmpx_lg_f16">;
+defm V_CMPX_GE_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x086, "v_cmpx_ge_f16">;
+defm V_CMPX_O_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x087, "v_cmpx_o_f16">;
+defm V_CMPX_U_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x088, "v_cmpx_u_f16">;
+defm V_CMPX_NGE_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x089, "v_cmpx_nge_f16">;
+defm V_CMPX_NLG_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x08a, "v_cmpx_nlg_f16">;
+defm V_CMPX_NGT_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x08b, "v_cmpx_ngt_f16">;
+defm V_CMPX_NLE_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x08c, "v_cmpx_nle_f16">;
+defm V_CMPX_NEQ_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x08d, "v_cmpx_neq_f16">;
+defm V_CMPX_NLT_F16 : VOPCX_Real_t16_and_fake16_gfx11_gfx12<0x08e, "v_cmpx_nlt_f16">;
+defm V_CMPX_T_F16 : VOPCX_Real_t16_and_fake16_gfx11<0x08f, "v_cmpx_t_f16", "V_CMPX_TRU_F16", "v_cmpx_tru_f16">;
defm V_CMPX_F_F32 : VOPCX_Real_gfx11<0x090>;
defm V_CMPX_LT_F32 : VOPCX_Real_gfx11_gfx12<0x091>;
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s
index 60ec94446235ed..15ddb6374821c7 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopcx.s
@@ -339,47 +339,56 @@ v_cmpx_eq_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
v_cmpx_eq_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: v_cmpx_eq_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_cmpx_f_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_f_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmpx_f_f16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_f_f16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-v_cmpx_f_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
-// GFX11: v_cmpx_f_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmpx_f_f16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3]
+// GFX11: v_cmpx_f_f16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-v_cmpx_f_f16_e64_dpp v1, v2 row_mirror
-// GFX11: v_cmpx_f_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmpx_f_f16_e64_dpp v1.l, v2.l row_mirror
+// GFX11: v_cmpx_f_f16_e64_dpp v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-v_cmpx_f_f16_e64_dpp v1, v2 row_half_mirror
-// GFX11: v_cmpx_f_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmpx_f_f16_e64_dpp v1.l, v2.l row_half_mirror
+// GFX11: v_cmpx_f_f16_e64_dpp v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-v_cmpx_f_f16_e64_dpp v1, v2 row_shl:1
-// GFX11: v_cmpx_f_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmpx_f_f16_e64_dpp v1.l, v2.l row_shl:1
+// GFX11: v_cmpx_f_f16_e64_dpp v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-v_cmpx_f_f16_e64_dpp v1, v2 row_shl:15
-// GFX11: v_cmpx_f_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmpx_f_f16_e64_dpp v1.l, v2.l row_shl:15
+// GFX11: v_cmpx_f_f16_e64_dpp v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-v_cmpx_f_f16_e64_dpp v1, v2 row_shr:1
-// GFX11: v_cmpx_f_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmpx_f_f16_e64_dpp v1.l, v2.l row_shr:1
+// GFX11: v_cmpx_f_f16_e64_dpp v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-v_cmpx_f_f16_e64_dpp v1, v2 row_shr:15
-// GFX11: v_cmpx_f_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmpx_f_f16_e64_dpp v1.l, v2.l row_shr:15
+// GFX11: v_cmpx_f_f16_e64_dpp v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-v_cmpx_f_f16_e64_dpp v1, v2 row_ror:1
-// GFX11: v_cmpx_f_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmpx_f_f16_e64_dpp v1.l, v2.l row_ror:1
+// GFX11: v_cmpx_f_f16_e64_dpp v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-v_cmpx_f_f16_e64_dpp v1, v2 row_ror:15
-// GFX11: v_cmpx_f_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmpx_f_f16_e64_dpp v1.l, v2.l row_ror:15
+// GFX11: v_cmpx_f_f16_e64_dpp v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-v_cmpx_f_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: v_cmpx_f_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmpx_f_f16_e64_dpp v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_cmpx_f_f16_e64_dpp v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_cmpx_f_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: v_cmpx_f_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x80,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmpx_f_f16_e64_dpp |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_cmpx_f_f16_e64_dpp |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x80,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-v_cmpx_f_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: v_cmpx_f_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x80,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmpx_f_f16_e64_dpp -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_cmpx_f_f16_e64_dpp -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x80,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
-v_cmpx_f_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cmpx_f_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x80,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmpx_f_f16_e64_dpp -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmpx_f_f16_e64_dpp -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x80,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cmpx_f_f16_e64_dpp |v1.h|, -v2.h row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_cmpx_f_f16_e64_dpp |v1.h|, -v2.h op_sel:[1,1] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x19,0x80,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cmpx_f_f16_e64_dpp -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_cmpx_f_f16_e64_dpp -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x0a,0x80,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cmpx_f_f16_e64_dpp -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmpx_f_f16_e64_dpp -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x93,0x80,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
v_cmpx_f_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmpx_f_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x90,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -507,47 +516,56 @@ v_cmpx_f_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
v_cmpx_f_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: v_cmpx_f_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xc8,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_cmpx_ge_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_ge_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmpx_ge_f16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_ge_f16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_f16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3]
+// GFX11: v_cmpx_ge_f16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_mirror
+// GFX11: v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_half_mirror
+// GFX11: v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-v_cmpx_ge_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
-// GFX11: v_cmpx_ge_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_shl:1
+// GFX11: v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-v_cmpx_ge_f16_e64_dpp v1, v2 row_mirror
-// GFX11: v_cmpx_ge_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_shl:15
+// GFX11: v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-v_cmpx_ge_f16_e64_dpp v1, v2 row_half_mirror
-// GFX11: v_cmpx_ge_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_shr:1
+// GFX11: v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-v_cmpx_ge_f16_e64_dpp v1, v2 row_shl:1
-// GFX11: v_cmpx_ge_f16_e64_dpp v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_shr:15
+// GFX11: v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-v_cmpx_ge_f16_e64_dpp v1, v2 row_shl:15
-// GFX11: v_cmpx_ge_f16_e64_dpp v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_ror:1
+// GFX11: v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-v_cmpx_ge_f16_e64_dpp v1, v2 row_shr:1
-// GFX11: v_cmpx_ge_f16_e64_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_ror:15
+// GFX11: v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-v_cmpx_ge_f16_e64_dpp v1, v2 row_shr:15
-// GFX11: v_cmpx_ge_f16_e64_dpp v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_cmpx_ge_f16_e64_dpp v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_cmpx_ge_f16_e64_dpp v1, v2 row_ror:1
-// GFX11: v_cmpx_ge_f16_e64_dpp v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cmpx_ge_f16_e64_dpp |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_cmpx_ge_f16_e64_dpp |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x86,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-v_cmpx_ge_f16_e64_dpp v1, v2 row_ror:15
-// GFX11: v_cmpx_ge_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cmpx_ge_f16_e64_dpp -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_cmpx_ge_f16_e64_dpp -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x86,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
-v_cmpx_ge_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: v_cmpx_ge_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cmpx_ge_f16_e64_dpp -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmpx_ge_f16_e64_dpp -|v255.l|, -|v255.l| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x86,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
-v_cmpx_ge_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: v_cmpx_ge_f16_e64_dpp |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x01,0x86,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cmpx_ge_f16_e64_dpp |v1.h|, -v2.h row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_cmpx_ge_f16_e64_dpp |v1.h|, -v2.h op_sel:[1,1] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x7e,0x19,0x86,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-v_cmpx_ge_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: v_cmpx_ge_f16_e64_dpp -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x02,0x86,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cmpx_ge_f16_e64_dpp -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_cmpx_ge_f16_e64_dpp -v1.h, |v2.l| op_sel:[1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x7e,0x0a,0x86,0xd4,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
-v_cmpx_ge_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cmpx_ge_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x83,0x86,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cmpx_ge_f16_e64_dpp -|v255.l|, -|v255.h| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cmpx_ge_f16_e64_dpp -|v255.l|, -|v255.h| op_sel:[0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x93,0x86,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
v_cmpx_ge_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmpx_ge_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -759,47 +777,56 @@ v_cmpx_ge_u32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
v_cmpx_ge_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: v_cmpx_ge_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_cmpx_gt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_gt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmpx_gt_f16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_gt_f16_e64_dpp v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-v_cmpx_gt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3]
-// GFX11: v_cmpx_gt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmpx_gt_f16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3]
+// GFX11: v_cmpx_gt_f16_e64_dpp v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-v_cmpx_gt_f16_e64_dpp v1, v2 row_mirror
-// GFX11: v_cmpx_gt_f16_e64_dpp v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cmpx_gt_f16_e64_dpp v1.l, v2.l row_mirror
+// GFX11: v_cmpx_gt_f16_e64_dpp v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-v_cmpx_gt_f16_e64_dpp v1, v2 row_half_mirror
-// GFX11: v_cmpx_gt_f16_e64_dpp v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cmpx_gt_f16_e64_dpp v1.l, v2.l row_half_mirror
+// GFX11: v_cmpx_gt_f16_e64_dpp...
[truncated]
|
@@ -518,11 +518,11 @@ v_cmpx_eq_u64_e64 src_scc, exec | |||
v_cmpx_eq_u64_e64 0xaf123456, vcc |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing any tests for v_cmpx_eq_f16_e64 .l and .h in this file and several others. In one file I see .l but no .h tests for v_cmpx_eq_f16_e64.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks Joe for pointing this out! I will double check
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems both v_cmp_eq_f16
(from previous patch) and v_cmpx_eq_f16
(from this patch) are missing. Removed v_cmpx_eq_f16
from this patch and will open another patch to merge the missing two
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That sounds fine for asm. I see in the disasm tests we are missing tests for .h registers on v_cmpx_eq_f16. What is the plan for those?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Both asm,disasm test and t16 instruction definition for v_cmpx_eq_f16
are removed from this patch and I will add them in the next patch
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Another patch for v_cmpx_eq_f16
is created here #124038
In fact, after fixing the script it seems v_cmp_eq_f16
is merged correctly so we don't need a patch for that anymore.
a078660
to
6301c5b
Compare
6301c5b
to
decabc3
Compare
rebased and resolved conflicts |
# GFX12-REAL16: v_cmpx_f_f16 v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x00,0x7d,0x7f,0x77,0x39,0x05] | ||
# GFX12-FAKE16: v_cmpx_f_f16 v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x00,0x7d,0x7f,0x77,0x39,0x05] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@broxigarchen This shouldn't be here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right. Seems I somehow still missed this.
I will create a new PR and cleanup the test altogether
A bulk commit of true16 support for v_cmpx_xx_f16 instructions including:
v_cmpx_f_f16
v_cmpx_le_f16
v_cmpx_gt_f16
v_cmpx_lg_f16
v_cmpx_ge_f16
v_cmpx_o_f16
v_cmpx_u_f16
v_cmpx_nge_f16
v_cmpx_nlg_f16
v_cmpx_ngt_f16
v_cmpx_nle_f16
v_cmpx_neq_f16
v_cmpx_nlt_f16
v_cmpx_t_f16
v_cmpx_eq_f16 is not in this patch and will be added in the following patch