@@ -18291,8 +18291,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
18291
18291
// D = A * B + C
18292
18292
// We need to specify one type for matrices AB and one for matrices CD.
18293
18293
SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;
18294
- // Some intrinsics expect "false" as an extra bool argument.
18295
- bool AppendExtraBoolArg = false;
18294
+ // On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
18295
+ // There is no need for the variable opsel argument, so always set it to
18296
+ // "false".
18297
+ bool AppendFalseForOpselArg = false;
18296
18298
unsigned BuiltinWMMAOp;
18297
18299
18298
18300
switch (BuiltinID) {
@@ -18312,7 +18314,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
18312
18314
break;
18313
18315
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
18314
18316
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
18315
- AppendExtraBoolArg = true;
18317
+ AppendFalseForOpselArg = true;
18316
18318
LLVM_FALLTHROUGH;
18317
18319
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
18318
18320
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
@@ -18321,7 +18323,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
18321
18323
break;
18322
18324
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
18323
18325
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
18324
- AppendExtraBoolArg = true;
18326
+ AppendFalseForOpselArg = true;
18325
18327
LLVM_FALLTHROUGH;
18326
18328
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
18327
18329
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
@@ -18437,7 +18439,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
18437
18439
SmallVector<Value *, 6> Args;
18438
18440
for (int i = 0, e = E->getNumArgs(); i != e; ++i)
18439
18441
Args.push_back(EmitScalarExpr(E->getArg(i)));
18440
- if (AppendExtraBoolArg )
18442
+ if (AppendFalseForOpselArg )
18441
18443
Args.push_back(Builder.getFalse());
18442
18444
18443
18445
SmallVector<llvm::Type *, 6> ArgTypes;
0 commit comments