Skip to content

Commit 86dfbca

Browse files
committed
Adjust overload types between clang and mlir
1 parent 684d08f commit 86dfbca

File tree

8 files changed

+71
-71
lines changed

8 files changed

+71
-71
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18341,14 +18341,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1834118341
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
1834218342
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
1834318343
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
18344-
ArgsForMatchingMatrixTypes = {0, 2};
18344+
ArgsForMatchingMatrixTypes = {2, 0};
1834518345
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
1834618346
break;
1834718347
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
1834818348
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
1834918349
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
1835018350
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
18351-
ArgsForMatchingMatrixTypes = {0, 2};
18351+
ArgsForMatchingMatrixTypes = {2, 0};
1835218352
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
1835318353
break;
1835418354
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
@@ -18357,7 +18357,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1835718357
LLVM_FALLTHROUGH;
1835818358
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
1835918359
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
18360-
ArgsForMatchingMatrixTypes = {0, 2};
18360+
ArgsForMatchingMatrixTypes = {2, 0};
1836118361
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
1836218362
break;
1836318363
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
@@ -18366,56 +18366,56 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1836618366
LLVM_FALLTHROUGH;
1836718367
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
1836818368
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
18369-
ArgsForMatchingMatrixTypes = {0, 2};
18369+
ArgsForMatchingMatrixTypes = {2, 0};
1837018370
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
1837118371
break;
1837218372
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
1837318373
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
18374-
ArgsForMatchingMatrixTypes = {0, 2};
18374+
ArgsForMatchingMatrixTypes = {2, 0};
1837518375
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
1837618376
break;
1837718377
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
1837818378
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
18379-
ArgsForMatchingMatrixTypes = {0, 2};
18379+
ArgsForMatchingMatrixTypes = {2, 0};
1838018380
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
1838118381
break;
1838218382
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
1838318383
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
1838418384
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
1838518385
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
18386-
ArgsForMatchingMatrixTypes = {1, 4};
18386+
ArgsForMatchingMatrixTypes = {4, 1};
1838718387
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
1838818388
break;
1838918389
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
1839018390
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
1839118391
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
1839218392
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
18393-
ArgsForMatchingMatrixTypes = {1, 4};
18393+
ArgsForMatchingMatrixTypes = {4, 1};
1839418394
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
1839518395
break;
1839618396
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
1839718397
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
18398-
ArgsForMatchingMatrixTypes = {0, 2};
18398+
ArgsForMatchingMatrixTypes = {2, 0};
1839918399
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
1840018400
break;
1840118401
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
1840218402
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
18403-
ArgsForMatchingMatrixTypes = {0, 2};
18403+
ArgsForMatchingMatrixTypes = {2, 0};
1840418404
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
1840518405
break;
1840618406
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
1840718407
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
18408-
ArgsForMatchingMatrixTypes = {0, 2};
18408+
ArgsForMatchingMatrixTypes = {2, 0};
1840918409
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
1841018410
break;
1841118411
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
1841218412
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
18413-
ArgsForMatchingMatrixTypes = {0, 2};
18413+
ArgsForMatchingMatrixTypes = {2, 0};
1841418414
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
1841518415
break;
1841618416
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
1841718417
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
18418-
ArgsForMatchingMatrixTypes = {1, 4};
18418+
ArgsForMatchingMatrixTypes = {4, 1};
1841918419
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
1842018420
break;
1842118421
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:

clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w32.cl

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ typedef int v8i __attribute__((ext_vector_type(8)));
1616

1717
// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_f16_w32(
1818
// CHECK-GFX1200-NEXT: entry:
19-
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f16.v8f32(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x float> [[C:%.*]])
19+
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x float> [[C:%.*]])
2020
// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4:![0-9]+]]
2121
// CHECK-GFX1200-NEXT: ret void
2222
//
@@ -31,7 +31,7 @@ void test_amdgcn_wmma_f32_16x16x16_f16_w32(global v8f* out, v8h a, v8h b, v8f c)
3131

3232
// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf16_w32(
3333
// CHECK-GFX1200-NEXT: entry:
34-
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8bf16.v8f32(<8 x bfloat> [[A:%.*]], <8 x bfloat> [[B:%.*]], <8 x float> [[C:%.*]])
34+
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v8bf16(<8 x bfloat> [[A:%.*]], <8 x bfloat> [[B:%.*]], <8 x float> [[C:%.*]])
3535
// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
3636
// CHECK-GFX1200-NEXT: ret void
3737
//
@@ -76,7 +76,7 @@ void test_amdgcn_wmma_bf16_16x16x16_bf16_w32(global v8bf* out, v8bf a, v8bf b, v
7676

7777
// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_i32_16x16x16_iu8_w32(
7878
// CHECK-GFX1200-NEXT: entry:
79-
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v2i32.v8i32(i1 true, <2 x i32> [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false)
79+
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32(i1 true, <2 x i32> [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false)
8080
// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
8181
// CHECK-GFX1200-NEXT: ret void
8282
//
@@ -91,7 +91,7 @@ void test_amdgcn_wmma_i32_16x16x16_iu8_w32(global v8i* out, v2i a, v2i b, v8i c)
9191

9292
// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_i32_16x16x16_iu4_w32(
9393
// CHECK-GFX1200-NEXT: entry:
94-
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.i32.v8i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <8 x i32> [[C:%.*]], i1 false)
94+
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <8 x i32> [[C:%.*]], i1 false)
9595
// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
9696
// CHECK-GFX1200-NEXT: ret void
9797
//
@@ -102,7 +102,7 @@ void test_amdgcn_wmma_i32_16x16x16_iu4_w32(global v8i* out, int a, int b, v8i c)
102102

103103
// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(
104104
// CHECK-GFX1200-NEXT: entry:
105-
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v2i32.v8f32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <8 x float> [[C:%.*]])
105+
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v8f32.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <8 x float> [[C:%.*]])
106106
// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
107107
// CHECK-GFX1200-NEXT: ret void
108108
//
@@ -113,7 +113,7 @@ void test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(global v8f* out, v2i a, v2i b, v8
113113

114114
// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(
115115
// CHECK-GFX1200-NEXT: entry:
116-
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v2i32.v8f32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <8 x float> [[C:%.*]])
116+
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <8 x float> [[C:%.*]])
117117
// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
118118
// CHECK-GFX1200-NEXT: ret void
119119
//
@@ -124,7 +124,7 @@ void test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(global v8f* out, v2i a, v2i b, v8
124124

125125
// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(
126126
// CHECK-GFX1200-NEXT: entry:
127-
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v2i32.v8f32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <8 x float> [[C:%.*]])
127+
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v8f32.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <8 x float> [[C:%.*]])
128128
// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
129129
// CHECK-GFX1200-NEXT: ret void
130130
//
@@ -135,7 +135,7 @@ void test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(global v8f* out, v2i a, v2i b, v8
135135

136136
// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(
137137
// CHECK-GFX1200-NEXT: entry:
138-
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v2i32.v8f32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <8 x float> [[C:%.*]])
138+
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v8f32.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <8 x float> [[C:%.*]])
139139
// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
140140
// CHECK-GFX1200-NEXT: ret void
141141
//
@@ -146,7 +146,7 @@ void test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(global v8f* out, v2i a, v2i b, v8
146146

147147
// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_i32_16x16x32_iu4_w32(
148148
// CHECK-GFX1200-NEXT: entry:
149-
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v2i32.v8i32(i1 true, <2 x i32> [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false)
149+
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32(i1 true, <2 x i32> [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false)
150150
// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
151151
// CHECK-GFX1200-NEXT: ret void
152152
//

clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w64.cl

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ typedef int v4i __attribute__((ext_vector_type(4)));
1515

1616
// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_f16_w64(
1717
// CHECK-GFX1200-NEXT: entry:
18-
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f16.v4f32(<4 x half> [[A:%.*]], <4 x half> [[B:%.*]], <4 x float> [[C:%.*]])
18+
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v4f16(<4 x half> [[A:%.*]], <4 x half> [[B:%.*]], <4 x float> [[C:%.*]])
1919
// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4:![0-9]+]]
2020
// CHECK-GFX1200-NEXT: ret void
2121
//
@@ -30,7 +30,7 @@ void test_amdgcn_wmma_f32_16x16x16_f16_w64(global v4f* out, v4h a, v4h b, v4f c)
3030

3131
// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf16_w64(
3232
// CHECK-GFX1200-NEXT: entry:
33-
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4bf16.v4f32(<4 x bfloat> [[A:%.*]], <4 x bfloat> [[B:%.*]], <4 x float> [[C:%.*]])
33+
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v4bf16(<4 x bfloat> [[A:%.*]], <4 x bfloat> [[B:%.*]], <4 x float> [[C:%.*]])
3434
// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
3535
// CHECK-GFX1200-NEXT: ret void
3636
//
@@ -75,7 +75,7 @@ void test_amdgcn_wmma_bf16_16x16x16_bf16_w64(global v4bf* out, v4bf a, v4bf b, v
7575

7676
// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_i32_16x16x16_iu8_w64(
7777
// CHECK-GFX1200-NEXT: entry:
78-
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.i32.v4i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <4 x i32> [[C:%.*]], i1 false)
78+
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <4 x i32> [[C:%.*]], i1 false)
7979
// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
8080
// CHECK-GFX1200-NEXT: ret void
8181
//
@@ -90,7 +90,7 @@ void test_amdgcn_wmma_i32_16x16x16_iu8_w64(global v4i* out, int a, int b, v4i c)
9090

9191
// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_i32_16x16x16_iu4_w64(
9292
// CHECK-GFX1200-NEXT: entry:
93-
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.i32.v4i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <4 x i32> [[C:%.*]], i1 false)
93+
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <4 x i32> [[C:%.*]], i1 false)
9494
// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
9595
// CHECK-GFX1200-NEXT: ret void
9696
//
@@ -101,7 +101,7 @@ void test_amdgcn_wmma_i32_16x16x16_iu4_w64(global v4i* out, int a, int b, v4i c)
101101

102102
// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(
103103
// CHECK-GFX1200-NEXT: entry:
104-
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.i32.v4f32(i32 [[A:%.*]], i32 [[B:%.*]], <4 x float> [[C:%.*]])
104+
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v4f32.i32(i32 [[A:%.*]], i32 [[B:%.*]], <4 x float> [[C:%.*]])
105105
// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
106106
// CHECK-GFX1200-NEXT: ret void
107107
//
@@ -112,7 +112,7 @@ void test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(global v4f* out, int a, int b, v4
112112

113113
// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(
114114
// CHECK-GFX1200-NEXT: entry:
115-
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.i32.v4f32(i32 [[A:%.*]], i32 [[B:%.*]], <4 x float> [[C:%.*]])
115+
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v4f32.i32(i32 [[A:%.*]], i32 [[B:%.*]], <4 x float> [[C:%.*]])
116116
// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
117117
// CHECK-GFX1200-NEXT: ret void
118118
//
@@ -123,7 +123,7 @@ void test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(global v4f* out, int a, int b, v4
123123

124124
// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(
125125
// CHECK-GFX1200-NEXT: entry:
126-
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.i32.v4f32(i32 [[A:%.*]], i32 [[B:%.*]], <4 x float> [[C:%.*]])
126+
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v4f32.i32(i32 [[A:%.*]], i32 [[B:%.*]], <4 x float> [[C:%.*]])
127127
// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
128128
// CHECK-GFX1200-NEXT: ret void
129129
//
@@ -134,7 +134,7 @@ void test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(global v4f* out, int a, int b, v4
134134

135135
// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(
136136
// CHECK-GFX1200-NEXT: entry:
137-
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.i32.v4f32(i32 [[A:%.*]], i32 [[B:%.*]], <4 x float> [[C:%.*]])
137+
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v4f32.i32(i32 [[A:%.*]], i32 [[B:%.*]], <4 x float> [[C:%.*]])
138138
// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
139139
// CHECK-GFX1200-NEXT: ret void
140140
//
@@ -145,7 +145,7 @@ void test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(global v4f* out, int a, int b, v4
145145

146146
// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_i32_16x16x32_iu4_w32(
147147
// CHECK-GFX1200-NEXT: entry:
148-
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.i32.v4i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <4 x i32> [[C:%.*]], i1 false)
148+
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <4 x i32> [[C:%.*]], i1 false)
149149
// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
150150
// CHECK-GFX1200-NEXT: ret void
151151
//

0 commit comments

Comments
 (0)