Skip to content

Commit ac44630

Browse files
d0kzmodem
authored andcommitted
Revert "[mlir] Create a gpu.module operation for the GPU Dialect."
This reverts commit 4624a1e. Causing problems downstream. (cherry picked from commit 0133cc6)
1 parent c4a134a commit ac44630

24 files changed

+140
-235
lines changed

mlir/include/mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,12 @@ namespace mlir {
1919
class Location;
2020
class ModuleOp;
2121

22-
template <typename T>
23-
class OpPassBase;
24-
25-
namespace gpu {
26-
class GPUModuleOp;
27-
} // namespace gpu
28-
2922
namespace LLVM {
3023
class LLVMDialect;
3124
} // namespace LLVM
3225

26+
template <typename T> class OpPassBase;
27+
3328
using OwnedCubin = std::unique_ptr<std::vector<char>>;
3429
using CubinGenerator =
3530
std::function<OwnedCubin(const std::string &, Location, StringRef)>;
@@ -43,7 +38,7 @@ using CubinGenerator =
4338
/// attached as a string attribute named 'nvvm.cubin' to the kernel function.
4439
/// After the transformation, the body of the kernel function is removed (i.e.,
4540
/// it is turned into a declaration).
46-
std::unique_ptr<OpPassBase<gpu::GPUModuleOp>>
41+
std::unique_ptr<OpPassBase<ModuleOp>>
4742
createConvertGPUKernelToCubinPass(CubinGenerator cubinGenerator);
4843

4944
/// Creates a pass to convert a gpu.launch_func operation into a sequence of

mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,15 @@ namespace mlir {
1414
class LLVMTypeConverter;
1515
class OwningRewritePatternList;
1616

17-
template <typename OpT>
18-
class OpPassBase;
19-
20-
namespace gpu {
21-
class GPUModuleOp;
22-
}
17+
class ModuleOp;
18+
template <typename OpT> class OpPassBase;
2319

2420
/// Collect a set of patterns to convert from the GPU dialect to NVVM.
2521
void populateGpuToNVVMConversionPatterns(LLVMTypeConverter &converter,
2622
OwningRewritePatternList &patterns);
2723

2824
/// Creates a pass that lowers GPU dialect operations to NVVM counterparts.
29-
std::unique_ptr<OpPassBase<gpu::GPUModuleOp>> createLowerGpuOpsToNVVMOpsPass();
25+
std::unique_ptr<OpPassBase<ModuleOp>> createLowerGpuOpsToNVVMOpsPass();
3026

3127
} // namespace mlir
3228

mlir/include/mlir/Dialect/GPU/GPUOps.td

Lines changed: 0 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -588,56 +588,4 @@ def GPU_BarrierOp : GPU_Op<"barrier"> {
588588
let printer = [{ p << getOperationName(); }];
589589
}
590590

591-
def GPU_GPUModuleOp : GPU_Op<"module", [
592-
IsolatedFromAbove, SymbolTable, Symbol,
593-
SingleBlockImplicitTerminator<"ModuleEndOp">
594-
]> {
595-
let summary = "A top level compilation unit containing code to be run on a GPU.";
596-
let description = [{
597-
GPU module contains code that is intended to be run on a GPU. A host device
598-
can launch this code through a gpu.launc_func that creates a fully
599-
qualified symbol through the gpu.module's symbol and a gpu.func symbol
600-
contained in the gpu.module.
601-
602-
The module's top-level scope is modeled by a single region with a single
603-
block. GPU modules are required to have a name that is used for symbol
604-
resolution by the gpu.launch_func operation.
605-
606-
Using an op with a region to define a GPU module enables "embedding" GPU
607-
modules with SIMT execution models in other dialects in a clean manner and
608-
allows filtering of code regions to execute passes on only code intended to
609-
or not intended to be run on the separate device.
610-
611-
```
612-
gpu.module @symbol_name {
613-
gpu.func {}
614-
...
615-
gpu.module_end
616-
}
617-
618-
```
619-
}];
620-
let builders = [OpBuilder<"Builder *builder, OperationState &result, "
621-
"StringRef name">];
622-
let parser = [{ return ::parseGPUModuleOp(parser, result); }];
623-
let printer = [{ return ::print(p, *this); }];
624-
let regions = (region SizedRegion<1>:$body);
625-
626-
// We need to ensure the block inside the region is properly terminated;
627-
// the auto-generated builders do not guarantee that.
628-
let skipDefaultBuilders = 1;
629-
}
630-
631-
def GPU_ModuleEndOp : GPU_Op<"module_end", [
632-
Terminator, HasParent<"GPUModuleOp">
633-
]> {
634-
let summary = "A pseudo op that marks the end of a gpu.module.";
635-
let description = [{
636-
This op terminates the only block inside the only region of a `gpu.module`.
637-
}];
638-
639-
let parser = [{ return success(); }];
640-
let printer = [{ p << getOperationName(); }];
641-
}
642-
643591
#endif // GPU_OPS

mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,18 @@ static constexpr const char *kCubinAnnotation = "nvvm.cubin";
4646
/// IR and further to PTX. A user provided CubinGenerator compiles the PTX to
4747
/// GPU binary code, which is then attached as an attribute to the function. The
4848
/// function body is erased.
49-
class GpuKernelToCubinPass
50-
: public OperationPass<GpuKernelToCubinPass, gpu::GPUModuleOp> {
49+
class GpuKernelToCubinPass : public ModulePass<GpuKernelToCubinPass> {
5150
public:
5251
GpuKernelToCubinPass(
5352
CubinGenerator cubinGenerator = compilePtxToCubinForTesting)
5453
: cubinGenerator(cubinGenerator) {}
5554

56-
void runOnOperation() override {
57-
gpu::GPUModuleOp module = getOperation();
55+
void runOnModule() override {
56+
ModuleOp module = getModule();
57+
if (!module.getAttrOfType<UnitAttr>(
58+
gpu::GPUDialect::getKernelModuleAttrName()) ||
59+
!module.getName())
60+
return;
5861

5962
// Make sure the NVPTX target is initialized.
6063
LLVMInitializeNVPTXTarget();
@@ -68,8 +71,8 @@ class GpuKernelToCubinPass
6871

6972
// Translate the module to CUBIN and attach the result as attribute to the
7073
// module.
71-
if (auto cubinAttr = translateGPUModuleToCubinAnnotation(
72-
*llvmModule, module.getLoc(), module.getName()))
74+
if (auto cubinAttr = translateGpuModuleToCubinAnnotation(
75+
*llvmModule, module.getLoc(), *module.getName()))
7376
module.setAttr(kCubinAnnotation, cubinAttr);
7477
else
7578
signalPassFailure();
@@ -89,7 +92,7 @@ class GpuKernelToCubinPass
8992
StringRef name);
9093

9194
/// Translates llvmModule to cubin and returns the result as attribute.
92-
StringAttr translateGPUModuleToCubinAnnotation(llvm::Module &llvmModule,
95+
StringAttr translateGpuModuleToCubinAnnotation(llvm::Module &llvmModule,
9396
Location loc, StringRef name);
9497

9598
CubinGenerator cubinGenerator;
@@ -146,15 +149,15 @@ OwnedCubin GpuKernelToCubinPass::convertModuleToCubin(llvm::Module &llvmModule,
146149
return cubinGenerator(ptx, loc, name);
147150
}
148151

149-
StringAttr GpuKernelToCubinPass::translateGPUModuleToCubinAnnotation(
152+
StringAttr GpuKernelToCubinPass::translateGpuModuleToCubinAnnotation(
150153
llvm::Module &llvmModule, Location loc, StringRef name) {
151154
auto cubin = convertModuleToCubin(llvmModule, loc, name);
152155
if (!cubin)
153156
return {};
154157
return StringAttr::get({cubin->data(), cubin->size()}, loc->getContext());
155158
}
156159

157-
std::unique_ptr<OpPassBase<gpu::GPUModuleOp>>
160+
std::unique_ptr<OpPassBase<ModuleOp>>
158161
mlir::createConvertGPUKernelToCubinPass(CubinGenerator cubinGenerator) {
159162
return std::make_unique<GpuKernelToCubinPass>(cubinGenerator);
160163
}

mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,9 @@ class GpuLaunchFuncToCudaCallsPass
132132

133133
// GPU kernel modules are no longer necessary since we have a global
134134
// constant with the CUBIN data.
135-
for (auto m :
136-
llvm::make_early_inc_range(getModule().getOps<gpu::GPUModuleOp>()))
137-
m.erase();
135+
for (auto m : llvm::make_early_inc_range(getModule().getOps<ModuleOp>()))
136+
if (m.getAttrOfType<UnitAttr>(gpu::GPUDialect::getKernelModuleAttrName()))
137+
m.erase();
138138
}
139139

140140
private:
@@ -343,8 +343,8 @@ void GpuLaunchFuncToCudaCallsPass::translateGpuLaunchCalls(
343343
builder.getI32IntegerAttr(0));
344344
// Create an LLVM global with CUBIN extracted from the kernel annotation and
345345
// obtain a pointer to the first byte in it.
346-
auto kernelModule = getModule().lookupSymbol<gpu::GPUModuleOp>(
347-
launchOp.getKernelModuleName());
346+
auto kernelModule =
347+
getModule().lookupSymbol<ModuleOp>(launchOp.getKernelModuleName());
348348
assert(kernelModule && "expected a kernel module");
349349

350350
auto cubinAttr = kernelModule.getAttrOfType<StringAttr>(kCubinAnnotation);
@@ -354,7 +354,8 @@ void GpuLaunchFuncToCudaCallsPass::translateGpuLaunchCalls(
354354
return signalPassFailure();
355355
}
356356

357-
SmallString<128> nameBuffer(kernelModule.getName());
357+
assert(kernelModule.getName() && "expected a named module");
358+
SmallString<128> nameBuffer(*kernelModule.getName());
358359
nameBuffer.append(kCubinStorageSuffix);
359360
Value data = LLVM::createGlobalString(
360361
loc, builder, nameBuffer.str(), cubinAttr.getValue(),

mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ struct GPUAllReduceOpLowering : public LLVMOpLowering {
200200
auto type = operand.getType().cast<LLVM::LLVMType>();
201201

202202
// Create shared memory array to store the warp reduction.
203-
auto module = operand.getDefiningOp()->getParentOfType<gpu::GPUModuleOp>();
203+
auto module = operand.getDefiningOp()->getParentOfType<ModuleOp>();
204204
assert(module && "op must belong to a module");
205205
Value sharedMemPtr =
206206
createSharedMemoryArray(loc, module, type, kWarpSize, rewriter);
@@ -391,10 +391,10 @@ struct GPUAllReduceOpLowering : public LLVMOpLowering {
391391
}
392392

393393
/// Creates a global array stored in shared memory.
394-
Value createSharedMemoryArray(Location loc, gpu::GPUModuleOp module,
394+
Value createSharedMemoryArray(Location loc, ModuleOp module,
395395
LLVM::LLVMType elementType, int numElements,
396396
ConversionPatternRewriter &rewriter) const {
397-
OpBuilder builder(module.body());
397+
OpBuilder builder(module.getBodyRegion());
398398

399399
auto arrayType = LLVM::LLVMType::getArrayTy(elementType, numElements);
400400
StringRef name = "reduce_buffer";
@@ -699,11 +699,13 @@ struct GPUReturnOpLowering : public LLVMOpLowering {
699699
///
700700
/// This pass only handles device code and is not meant to be run on GPU host
701701
/// code.
702-
class LowerGpuOpsToNVVMOpsPass
703-
: public OperationPass<LowerGpuOpsToNVVMOpsPass, gpu::GPUModuleOp> {
702+
class LowerGpuOpsToNVVMOpsPass : public ModulePass<LowerGpuOpsToNVVMOpsPass> {
704703
public:
705-
void runOnOperation() override {
706-
gpu::GPUModuleOp m = getOperation();
704+
void runOnModule() override {
705+
ModuleOp m = getModule();
706+
if (!m.getAttrOfType<UnitAttr>(gpu::GPUDialect::getKernelModuleAttrName()))
707+
return;
708+
707709
OwningRewritePatternList patterns;
708710
NVVMTypeConverter converter(m.getContext());
709711
populateStdToLLVMConversionPatterns(converter, patterns);
@@ -716,7 +718,7 @@ class LowerGpuOpsToNVVMOpsPass
716718
target.addLegalDialect<LLVM::LLVMDialect>();
717719
target.addLegalDialect<NVVM::NVVMDialect>();
718720
// TODO(csigg): Remove once we support replacing non-root ops.
719-
target.addLegalOp<gpu::YieldOp, gpu::GPUModuleOp, gpu::ModuleEndOp>();
721+
target.addLegalOp<gpu::YieldOp>();
720722
if (failed(applyPartialConversion(m, target, patterns, &converter)))
721723
signalPassFailure();
722724
}
@@ -748,8 +750,7 @@ void mlir::populateGpuToNVVMConversionPatterns(
748750
"__nv_exp");
749751
}
750752

751-
std::unique_ptr<OpPassBase<gpu::GPUModuleOp>>
752-
mlir::createLowerGpuOpsToNVVMOpsPass() {
753+
std::unique_ptr<OpPassBase<ModuleOp>> mlir::createLowerGpuOpsToNVVMOpsPass() {
753754
return std::make_unique<LowerGpuOpsToNVVMOpsPass>();
754755
}
755756

mlir/lib/Conversion/GPUToSPIRV/CMakeLists.txt

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,8 @@
1-
set(LLVM_TARGET_DEFINITIONS GPUToSPIRV.td)
2-
mlir_tablegen(GPUToSPIRV.cpp.inc -gen-rewriters)
3-
add_public_tablegen_target(MLIRGPUToSPIRVIncGen)
4-
51
add_llvm_library(MLIRGPUtoSPIRVTransforms
62
ConvertGPUToSPIRV.cpp
73
ConvertGPUToSPIRVPass.cpp
84
)
95

10-
add_dependencies(MLIRGPUtoSPIRVTransforms
11-
MLIRGPUToSPIRVIncGen)
12-
136
target_link_libraries(MLIRGPUtoSPIRVTransforms
147
MLIRGPU
158
MLIRIR

mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp

Lines changed: 40 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,27 @@ class KernelFnConversion final : public SPIRVOpLowering<gpu::GPUFuncOp> {
6363
SmallVector<int32_t, 3> workGroupSizeAsInt32;
6464
};
6565

66-
/// Pattern to convert a gpu.module to a spv.module.
67-
class GPUModuleConversion final : public SPIRVOpLowering<gpu::GPUModuleOp> {
66+
/// Pattern to convert a module with gpu.kernel_module attribute to a
67+
/// spv.module.
68+
class KernelModuleConversion final : public SPIRVOpLowering<ModuleOp> {
6869
public:
69-
using SPIRVOpLowering<gpu::GPUModuleOp>::SPIRVOpLowering;
70+
using SPIRVOpLowering<ModuleOp>::SPIRVOpLowering;
7071

7172
PatternMatchResult
72-
matchAndRewrite(gpu::GPUModuleOp moduleOp, ArrayRef<Value> operands,
73+
matchAndRewrite(ModuleOp moduleOp, ArrayRef<Value> operands,
74+
ConversionPatternRewriter &rewriter) const override;
75+
};
76+
77+
/// Pattern to convert a module terminator op to a terminator of spv.module op.
78+
// TODO: Move this into DRR, but that requires ModuleTerminatorOp to be defined
79+
// in ODS.
80+
class KernelModuleTerminatorConversion final
81+
: public SPIRVOpLowering<ModuleTerminatorOp> {
82+
public:
83+
using SPIRVOpLowering<ModuleTerminatorOp>::SPIRVOpLowering;
84+
85+
PatternMatchResult
86+
matchAndRewrite(ModuleTerminatorOp terminatorOp, ArrayRef<Value> operands,
7387
ConversionPatternRewriter &rewriter) const override;
7488
};
7589

@@ -270,12 +284,16 @@ KernelFnConversion::matchAndRewrite(gpu::GPUFuncOp funcOp,
270284
}
271285

272286
//===----------------------------------------------------------------------===//
273-
// ModuleOp with gpu.module.
287+
// ModuleOp with gpu.kernel_module.
274288
//===----------------------------------------------------------------------===//
275289

276-
PatternMatchResult GPUModuleConversion::matchAndRewrite(
277-
gpu::GPUModuleOp moduleOp, ArrayRef<Value> operands,
290+
PatternMatchResult KernelModuleConversion::matchAndRewrite(
291+
ModuleOp moduleOp, ArrayRef<Value> operands,
278292
ConversionPatternRewriter &rewriter) const {
293+
if (!moduleOp.getAttrOfType<UnitAttr>(
294+
gpu::GPUDialect::getKernelModuleAttrName())) {
295+
return matchFailure();
296+
}
279297
// TODO : Generalize this to account for different extensions,
280298
// capabilities, extended_instruction_sets, other addressing models
281299
// and memory models.
@@ -284,8 +302,8 @@ PatternMatchResult GPUModuleConversion::matchAndRewrite(
284302
spirv::MemoryModel::GLSL450, spirv::Capability::Shader,
285303
spirv::Extension::SPV_KHR_storage_buffer_storage_class);
286304
// Move the region from the module op into the SPIR-V module.
287-
Region &spvModuleRegion = spvModule.body();
288-
rewriter.inlineRegionBefore(moduleOp.body(), spvModuleRegion,
305+
Region &spvModuleRegion = spvModule.getOperation()->getRegion(0);
306+
rewriter.inlineRegionBefore(moduleOp.getBodyRegion(), spvModuleRegion,
289307
spvModuleRegion.begin());
290308
// The spv.module build method adds a block with a terminator. Remove that
291309
// block. The terminator of the module op in the remaining block will be
@@ -295,6 +313,17 @@ PatternMatchResult GPUModuleConversion::matchAndRewrite(
295313
return matchSuccess();
296314
}
297315

316+
//===----------------------------------------------------------------------===//
317+
// ModuleTerminatorOp for gpu.kernel_module.
318+
//===----------------------------------------------------------------------===//
319+
320+
PatternMatchResult KernelModuleTerminatorConversion::matchAndRewrite(
321+
ModuleTerminatorOp terminatorOp, ArrayRef<Value> operands,
322+
ConversionPatternRewriter &rewriter) const {
323+
rewriter.replaceOpWithNewOp<spirv::ModuleEndOp>(terminatorOp);
324+
return matchSuccess();
325+
}
326+
298327
//===----------------------------------------------------------------------===//
299328
// GPU return inside kernel functions to SPIR-V return.
300329
//===----------------------------------------------------------------------===//
@@ -313,18 +342,14 @@ PatternMatchResult GPUReturnOpConversion::matchAndRewrite(
313342
// GPU To SPIRV Patterns.
314343
//===----------------------------------------------------------------------===//
315344

316-
namespace {
317-
#include "GPUToSPIRV.cpp.inc"
318-
}
319-
320345
void mlir::populateGPUToSPIRVPatterns(MLIRContext *context,
321346
SPIRVTypeConverter &typeConverter,
322347
OwningRewritePatternList &patterns,
323348
ArrayRef<int64_t> workGroupSize) {
324-
populateWithGenerated(context, &patterns);
325349
patterns.insert<KernelFnConversion>(context, typeConverter, workGroupSize);
326350
patterns.insert<
327-
GPUReturnOpConversion, ForOpConversion, GPUModuleConversion,
351+
GPUReturnOpConversion, ForOpConversion, KernelModuleConversion,
352+
KernelModuleTerminatorConversion,
328353
LaunchConfigConversion<gpu::BlockDimOp, spirv::BuiltIn::WorkgroupSize>,
329354
LaunchConfigConversion<gpu::BlockIdOp, spirv::BuiltIn::WorkgroupId>,
330355
LaunchConfigConversion<gpu::GridDimOp, spirv::BuiltIn::NumWorkgroups>,

0 commit comments

Comments
 (0)