diff --git a/llvm/include/llvm/Frontend/HLSL/CBuffer.h b/llvm/include/llvm/Frontend/HLSL/CBuffer.h new file mode 100644 index 0000000000000..694a7fa854576 --- /dev/null +++ b/llvm/include/llvm/Frontend/HLSL/CBuffer.h @@ -0,0 +1,64 @@ +//===- CBuffer.h - HLSL constant buffer handling ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file This file contains utilities to work with constant buffers in HLSL. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FRONTEND_HLSL_CBUFFER_H +#define LLVM_FRONTEND_HLSL_CBUFFER_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include + +namespace llvm { +class Module; +class GlobalVariable; +class NamedMDNode; + +namespace hlsl { + +struct CBufferMember { + GlobalVariable *GV; + size_t Offset; + + CBufferMember(GlobalVariable *GV, size_t Offset) : GV(GV), Offset(Offset) {} +}; + +struct CBufferMapping { + GlobalVariable *Handle; + SmallVector Members; + + CBufferMapping(GlobalVariable *Handle) : Handle(Handle) {} +}; + +class CBufferMetadata { + NamedMDNode *MD; + SmallVector Mappings; + + CBufferMetadata(NamedMDNode *MD) : MD(MD) {} + +public: + static std::optional get(Module &M); + + using iterator = SmallVector::iterator; + iterator begin() { return Mappings.begin(); } + iterator end() { return Mappings.end(); } + + void eraseFromModule(); +}; + +APInt translateCBufArrayOffset(const DataLayout &DL, APInt Offset, + ArrayType *Ty); + +} // namespace hlsl +} // namespace llvm + +#endif // LLVM_FRONTEND_HLSL_CBUFFER_H diff --git a/llvm/lib/Frontend/HLSL/CBuffer.cpp b/llvm/lib/Frontend/HLSL/CBuffer.cpp new file mode 100644 index 0000000000000..37c0d912e09ee --- /dev/null +++ b/llvm/lib/Frontend/HLSL/CBuffer.cpp @@ -0,0 +1,71 @@ +//===- CBuffer.cpp - HLSL constant buffer handling ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Frontend/HLSL/CBuffer.h" +#include "llvm/Frontend/HLSL/HLSLResource.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" + +using namespace llvm; +using namespace llvm::hlsl; + +static size_t getMemberOffset(GlobalVariable *Handle, size_t Index) { + auto *HandleTy = cast(Handle->getValueType()); + assert(HandleTy->getName().ends_with(".CBuffer") && "Not a cbuffer type"); + assert(HandleTy->getNumTypeParameters() == 1 && "Expected layout type"); + + auto *LayoutTy = cast(HandleTy->getTypeParameter(0)); + assert(LayoutTy->getName().ends_with(".Layout") && "Not a layout type"); + + // Skip the "size" parameter. + size_t ParamIndex = Index + 1; + assert(LayoutTy->getNumIntParameters() > ParamIndex && + "Not enough parameters"); + + return LayoutTy->getIntParameter(ParamIndex); +} + +std::optional CBufferMetadata::get(Module &M) { + NamedMDNode *CBufMD = M.getNamedMetadata("hlsl.cbs"); + if (!CBufMD) + return std::nullopt; + + std::optional Result({CBufMD}); + + for (const MDNode *MD : CBufMD->operands()) { + assert(MD->getNumOperands() && "Invalid cbuffer metadata"); + + auto *Handle = cast( + cast(MD->getOperand(0))->getValue()); + CBufferMapping &Mapping = Result->Mappings.emplace_back(Handle); + + for (int I = 1, E = MD->getNumOperands(); I < E; ++I) { + Metadata *OpMD = MD->getOperand(I); + // Some members may be null if they've been optimized out. + if (!OpMD) + continue; + auto *V = cast(cast(OpMD)->getValue()); + Mapping.Members.emplace_back(V, getMemberOffset(Handle, I - 1)); + } + } + + return Result; +} + +void CBufferMetadata::eraseFromModule() { + // Remove the cbs named metadata + MD->eraseFromParent(); +} + +APInt hlsl::translateCBufArrayOffset(const DataLayout &DL, APInt Offset, + ArrayType *Ty) { + int64_t TypeSize = DL.getTypeSizeInBits(Ty->getElementType()) / 8; + int64_t RoundUp = alignTo(TypeSize, Align(CBufferRowSizeInBytes)); + return Offset.udiv(TypeSize) * RoundUp; +} diff --git a/llvm/lib/Frontend/HLSL/CMakeLists.txt b/llvm/lib/Frontend/HLSL/CMakeLists.txt index eda6cb8e69a49..07a0c845ceef6 100644 --- a/llvm/lib/Frontend/HLSL/CMakeLists.txt +++ b/llvm/lib/Frontend/HLSL/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_component_library(LLVMFrontendHLSL + CBuffer.cpp HLSLResource.cpp ADDITIONAL_HEADER_DIRS diff --git a/llvm/lib/Target/DirectX/CMakeLists.txt b/llvm/lib/Target/DirectX/CMakeLists.txt index 13f8adbe4f132..c55028bc75dd6 100644 --- a/llvm/lib/Target/DirectX/CMakeLists.txt +++ b/llvm/lib/Target/DirectX/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_target(DirectXCodeGen DirectXTargetMachine.cpp DirectXTargetTransformInfo.cpp DXContainerGlobals.cpp + DXILCBufferAccess.cpp DXILDataScalarization.cpp DXILFinalizeLinkage.cpp DXILFlattenArrays.cpp diff --git a/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp b/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp new file mode 100644 index 0000000000000..7559f61b4cfb9 --- /dev/null +++ b/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp @@ -0,0 +1,210 @@ +//===- DXILCBufferAccess.cpp - Translate CBuffer Loads --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "DXILCBufferAccess.h" +#include "DirectX.h" +#include "llvm/Frontend/HLSL/CBuffer.h" +#include "llvm/Frontend/HLSL/HLSLResource.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicsDirectX.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/Local.h" + +#define DEBUG_TYPE "dxil-cbuffer-access" +using namespace llvm; + +namespace { +/// Helper for building a `load.cbufferrow` intrinsic given a simple type. +struct CBufferRowIntrin { + Intrinsic::ID IID; + Type *RetTy; + unsigned int EltSize; + unsigned int NumElts; + + CBufferRowIntrin(const DataLayout &DL, Type *Ty) { + assert(Ty == Ty->getScalarType() && "Expected scalar type"); + + switch (DL.getTypeSizeInBits(Ty)) { + case 16: + IID = Intrinsic::dx_resource_load_cbufferrow_8; + RetTy = StructType::get(Ty, Ty, Ty, Ty, Ty, Ty, Ty, Ty); + EltSize = 2; + NumElts = 8; + break; + case 32: + IID = Intrinsic::dx_resource_load_cbufferrow_4; + RetTy = StructType::get(Ty, Ty, Ty, Ty); + EltSize = 4; + NumElts = 4; + break; + case 64: + IID = Intrinsic::dx_resource_load_cbufferrow_2; + RetTy = StructType::get(Ty, Ty); + EltSize = 8; + NumElts = 2; + break; + default: + llvm_unreachable("Only 16, 32, and 64 bit types supported"); + } + } +}; +} // namespace + +static size_t getOffsetForCBufferGEP(GEPOperator *GEP, GlobalVariable *Global, + const DataLayout &DL) { + // Since we should always have a constant offset, we should only ever have a + // single GEP of indirection from the Global. + assert(GEP->getPointerOperand() == Global && + "Indirect access to resource handle"); + + APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0); + bool Success = GEP->accumulateConstantOffset(DL, ConstantOffset); + (void)Success; + assert(Success && "Offsets into cbuffer globals must be constant"); + + if (auto *ATy = dyn_cast(Global->getValueType())) + ConstantOffset = hlsl::translateCBufArrayOffset(DL, ConstantOffset, ATy); + + return ConstantOffset.getZExtValue(); +} + +/// Replace access via cbuffer global with a load from the cbuffer handle +/// itself. +static void replaceAccess(LoadInst *LI, GlobalVariable *Global, + GlobalVariable *HandleGV, size_t BaseOffset, + SmallVectorImpl &DeadInsts) { + const DataLayout &DL = HandleGV->getDataLayout(); + + size_t Offset = BaseOffset; + if (auto *GEP = dyn_cast(LI->getPointerOperand())) + Offset += getOffsetForCBufferGEP(GEP, Global, DL); + else if (LI->getPointerOperand() != Global) + llvm_unreachable("Load instruction doesn't reference cbuffer global"); + + IRBuilder<> Builder(LI); + auto *Handle = Builder.CreateLoad(HandleGV->getValueType(), HandleGV, + HandleGV->getName()); + + Type *Ty = LI->getType(); + CBufferRowIntrin Intrin(DL, Ty->getScalarType()); + // The cbuffer consists of some number of 16-byte rows. + unsigned int CurrentRow = Offset / hlsl::CBufferRowSizeInBytes; + unsigned int CurrentIndex = + (Offset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize; + + auto *CBufLoad = Builder.CreateIntrinsic( + Intrin.RetTy, Intrin.IID, + {Handle, ConstantInt::get(Builder.getInt32Ty(), CurrentRow)}, nullptr, + LI->getName()); + auto *Elt = + Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, LI->getName()); + + Value *Result = nullptr; + unsigned int Remaining = + ((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1; + if (Remaining == 0) { + // We only have a single element, so we're done. + Result = Elt; + + // However, if we loaded a <1 x T>, then we need to adjust the type here. + if (auto *VT = dyn_cast(LI->getType())) { + assert(VT->getNumElements() == 1 && "Can't have multiple elements here"); + Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result, + Builder.getInt32(0)); + } + } else { + // Walk each element and extract it, wrapping to new rows as needed. + SmallVector Extracts{Elt}; + while (Remaining--) { + CurrentIndex %= Intrin.NumElts; + + if (CurrentIndex == 0) + CBufLoad = Builder.CreateIntrinsic( + Intrin.RetTy, Intrin.IID, + {Handle, ConstantInt::get(Builder.getInt32Ty(), ++CurrentRow)}, + nullptr, LI->getName()); + + Extracts.push_back(Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, + LI->getName())); + } + + // Finally, we build up the original loaded value. + Result = PoisonValue::get(Ty); + for (int I = 0, E = Extracts.size(); I < E; ++I) + Result = + Builder.CreateInsertElement(Result, Extracts[I], Builder.getInt32(I)); + } + + LI->replaceAllUsesWith(Result); + DeadInsts.push_back(LI); +} + +static void replaceAccessesWithHandle(GlobalVariable *Global, + GlobalVariable *HandleGV, + size_t BaseOffset) { + SmallVector DeadInsts; + + SmallVector ToProcess{Global->users()}; + while (!ToProcess.empty()) { + User *Cur = ToProcess.pop_back_val(); + + // If we have a load instruction, replace the access. + if (auto *LI = dyn_cast(Cur)) { + replaceAccess(LI, Global, HandleGV, BaseOffset, DeadInsts); + continue; + } + + // Otherwise, walk users looking for a load... + ToProcess.append(Cur->user_begin(), Cur->user_end()); + } + RecursivelyDeleteTriviallyDeadInstructions(DeadInsts); +} + +static bool replaceCBufferAccesses(Module &M) { + std::optional CBufMD = hlsl::CBufferMetadata::get(M); + if (!CBufMD) + return false; + + for (const hlsl::CBufferMapping &Mapping : *CBufMD) + for (const hlsl::CBufferMember &Member : Mapping.Members) { + replaceAccessesWithHandle(Member.GV, Mapping.Handle, Member.Offset); + Member.GV->removeFromParent(); + } + + CBufMD->eraseFromModule(); + return true; +} + +PreservedAnalyses DXILCBufferAccess::run(Module &M, ModuleAnalysisManager &AM) { + PreservedAnalyses PA; + bool Changed = replaceCBufferAccesses(M); + + if (!Changed) + return PreservedAnalyses::all(); + return PA; +} + +namespace { +class DXILCBufferAccessLegacy : public ModulePass { +public: + bool runOnModule(Module &M) override { return replaceCBufferAccesses(M); } + StringRef getPassName() const override { return "DXIL CBuffer Access"; } + DXILCBufferAccessLegacy() : ModulePass(ID) {} + + static char ID; // Pass identification. +}; +char DXILCBufferAccessLegacy::ID = 0; +} // end anonymous namespace + +INITIALIZE_PASS(DXILCBufferAccessLegacy, DEBUG_TYPE, "DXIL CBuffer Access", + false, false) + +ModulePass *llvm::createDXILCBufferAccessLegacyPass() { + return new DXILCBufferAccessLegacy(); +} diff --git a/llvm/lib/Target/DirectX/DXILCBufferAccess.h b/llvm/lib/Target/DirectX/DXILCBufferAccess.h new file mode 100644 index 0000000000000..6c1cde164004e --- /dev/null +++ b/llvm/lib/Target/DirectX/DXILCBufferAccess.h @@ -0,0 +1,28 @@ +//===- DXILCBufferAccess.h - Translate CBuffer Loads ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file Pass for replacing loads from cbuffers in the cbuffer address space to +// cbuffer load intrinsics. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_DIRECTX_DXILCBUFFERACCESS_H +#define LLVM_LIB_TARGET_DIRECTX_DXILCBUFFERACCESS_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class DXILCBufferAccess : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_DIRECTX_DXILCBUFFERACCESS_H diff --git a/llvm/lib/Target/DirectX/DirectX.h b/llvm/lib/Target/DirectX/DirectX.h index 96a8a08c875f8..c0eb221d12203 100644 --- a/llvm/lib/Target/DirectX/DirectX.h +++ b/llvm/lib/Target/DirectX/DirectX.h @@ -35,6 +35,12 @@ void initializeDXILIntrinsicExpansionLegacyPass(PassRegistry &); /// Pass to expand intrinsic operations that lack DXIL opCodes ModulePass *createDXILIntrinsicExpansionLegacyPass(); +/// Initializer for DXIL CBuffer Access Pass +void initializeDXILCBufferAccessLegacyPass(PassRegistry &); + +/// Pass to translate loads in the cbuffer address space to intrinsics +ModulePass *createDXILCBufferAccessLegacyPass(); + /// Initializer for DXIL Data Scalarization Pass void initializeDXILDataScalarizationLegacyPass(PassRegistry &); diff --git a/llvm/lib/Target/DirectX/DirectXPassRegistry.def b/llvm/lib/Target/DirectX/DirectXPassRegistry.def index 87d91ead1896f..37093f16680a9 100644 --- a/llvm/lib/Target/DirectX/DirectXPassRegistry.def +++ b/llvm/lib/Target/DirectX/DirectXPassRegistry.def @@ -23,6 +23,7 @@ MODULE_ANALYSIS("dxil-root-signature-analysis", dxil::RootSignatureAnalysis()) #ifndef MODULE_PASS #define MODULE_PASS(NAME, CREATE_PASS) #endif +MODULE_PASS("dxil-cbuffer-access", DXILCBufferAccess()) MODULE_PASS("dxil-data-scalarization", DXILDataScalarization()) MODULE_PASS("dxil-flatten-arrays", DXILFlattenArrays()) MODULE_PASS("dxil-intrinsic-expansion", DXILIntrinsicExpansion()) diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp index 747e4b3eb9411..41f6f37a41f9d 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "DirectXTargetMachine.h" +#include "DXILCBufferAccess.h" #include "DXILDataScalarization.h" #include "DXILFlattenArrays.h" #include "DXILIntrinsicExpansion.h" @@ -65,6 +66,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() { initializeRootSignatureAnalysisWrapperPass(*PR); initializeDXILFinalizeLinkageLegacyPass(*PR); initializeDXILPrettyPrinterLegacyPass(*PR); + initializeDXILCBufferAccessLegacyPass(*PR); } class DXILTargetObjectFile : public TargetLoweringObjectFile { @@ -96,6 +98,7 @@ class DirectXPassConfig : public TargetPassConfig { void addCodeGenPrepare() override { addPass(createDXILFinalizeLinkageLegacyPass()); addPass(createDXILIntrinsicExpansionLegacyPass()); + addPass(createDXILCBufferAccessLegacyPass()); addPass(createDXILDataScalarizationLegacyPass()); addPass(createDXILFlattenArraysLegacyPass()); addPass(createDXILResourceAccessLegacyPass()); diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/array-typedgep.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/array-typedgep.ll new file mode 100644 index 0000000000000..dbd01b323aa2a --- /dev/null +++ b/llvm/test/CodeGen/DirectX/CBufferAccess/array-typedgep.ll @@ -0,0 +1,32 @@ +; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s + +; cbuffer CB : register(b0) { +; float a1[3]; +; } +%__cblayout_CB = type <{ [3 x float] }> + +@CB.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) poison +; CHECK: @CB.cb = +; CHECK-NOT: external {{.*}} addrspace(2) global +@a1 = external addrspace(2) global [3 x float], align 4 + +; CHECK: define void @f +define void @f(ptr %dst) { +entry: + %CB.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBs_36_0tt(i32 0, i32 0, i32 1, i32 0, i1 false) + store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) %CB.cb_h, ptr @CB.cb, align 4 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1) + ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 + ; CHECK: store float [[X]], ptr %dst + %a1 = load float, ptr addrspace(2) getelementptr inbounds ([3 x float], ptr addrspace(2) @a1, i32 0, i32 1), align 4 + store float %a1, ptr %dst, align 32 + + ret void +} + +; CHECK-NOT: !hlsl.cbs = +!hlsl.cbs = !{!0} + +!0 = !{ptr @CB.cb, ptr addrspace(2) @a1} diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll new file mode 100644 index 0000000000000..42d7943953b84 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll @@ -0,0 +1,124 @@ +; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s + +; cbuffer CB : register(b0) { +; float a1[3]; +; double3 a2[2]; +; float16_t a3[2][2]; +; uint64_t a4[3]; +; int4 a5[2][3][4]; +; uint16_t a6[1]; +; int64_t a7[2]; +; bool a8[4]; +; } +%__cblayout_CB = type <{ [3 x float], [2 x <3 x double>], [2 x [2 x half]], [3 x i64], [2 x [3 x [4 x <4 x i32>]]], [1 x i16], [2 x i64], [4 x i32] }> + +@CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) poison +; CHECK: @CB.cb = +; CHECK-NOT: external {{.*}} addrspace(2) global +@a1 = external local_unnamed_addr addrspace(2) global [3 x float], align 4 +@a2 = external local_unnamed_addr addrspace(2) global [2 x <3 x double>], align 32 +@a3 = external local_unnamed_addr addrspace(2) global [2 x [2 x half]], align 2 +@a4 = external local_unnamed_addr addrspace(2) global [3 x i64], align 8 +@a5 = external local_unnamed_addr addrspace(2) global [2 x [3 x [4 x <4 x i32>]]], align 16 +@a6 = external local_unnamed_addr addrspace(2) global [1 x i16], align 2 +@a7 = external local_unnamed_addr addrspace(2) global [2 x i64], align 8 +@a8 = external local_unnamed_addr addrspace(2) global [4 x i32], align 4 + +; CHECK: define void @f +define void @f(ptr %dst) { +entry: + %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false) + store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) %CB.cb_h.i.i, ptr @CB.cb, align 4 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1) + ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 + ; CHECK: store float [[X]], ptr %dst + %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 4), align 4 + store float %a1, ptr %dst, align 32 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 5) + ; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0 + ; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1 + ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 6) + ; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0 + ; CHECK: [[VEC0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0 + ; CHECK: [[VEC1:%.*]] = insertelement <3 x double> [[VEC0]], double [[Y]], i32 1 + ; CHECK: [[VEC2:%.*]] = insertelement <3 x double> [[VEC1]], double [[Z]], i32 2 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 8 + ; CHECK: store <3 x double> [[VEC2]], ptr [[PTR]] + %a2 = load <3 x double>, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a2, i32 32), align 8 + %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 8 + store <3 x double> %a2, ptr %a2.i, align 32 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 8) + ; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 32 + ; CHECK: store half [[X]], ptr [[PTR]] + %a3 = load half, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a3, i32 6), align 2 + %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 32 + store half %a3, ptr %a3.i, align 2 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 12) + ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 40 + ; CHECK: store i64 [[X]], ptr [[PTR]] + %a4 = load i64, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a4, i32 8), align 8 + %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 40 + store i64 %a4, ptr %a4.i, align 8 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 26) + ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 + ; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 + ; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2 + ; CHECK: [[A:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3 + ; CHECK: [[VEC0:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0 + ; CHECK: [[VEC1:%.*]] = insertelement <4 x i32> [[VEC0]], i32 [[Y]], i32 1 + ; CHECK: [[VEC2:%.*]] = insertelement <4 x i32> [[VEC1]], i32 [[Z]], i32 2 + ; CHECK: [[VEC3:%.*]] = insertelement <4 x i32> [[VEC2]], i32 [[A]], i32 3 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 48 + ; CHECK: store <4 x i32> [[VEC3]], ptr [[PTR]] + %a5 = load <4 x i32>, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a5, i32 272), align 4 + %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 48 + store <4 x i32> %a5, ptr %a5.i, align 4 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 38) + ; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 64 + ; CHECK: store i16 [[X]], ptr [[PTR]] + %a6 = load i16, ptr addrspace(2) @a6, align 2 + %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 64 + store i16 %a6, ptr %a6.i, align 2 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 40) + ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 72 + ; CHECK: store i64 [[X]], ptr [[PTR]] + %a7 = load i64, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a7, i32 8), align 8 + %a7.i = getelementptr inbounds nuw i8, ptr %dst, i32 72 + store i64 %a7, ptr %a7.i, align 8 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 42) + ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 80 + ; CHECK: store i32 [[X]], ptr [[PTR]] + %a8 = load i32, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a8, i32 4), align 4, !range !1, !noundef !2 + %a8.i = getelementptr inbounds nuw i8, ptr %dst, i32 80 + store i32 %a8, ptr %a8.i, align 4 + + ret void +} + +; CHECK-NOT: !hlsl.cbs = +!hlsl.cbs = !{!0} + +!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, ptr addrspace(2) @a5, ptr addrspace(2) @a6, ptr addrspace(2) @a7, ptr addrspace(2) @a8} +!1 = !{i32 0, i32 2} +!2 = !{} diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll new file mode 100644 index 0000000000000..d7272b449166d --- /dev/null +++ b/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll @@ -0,0 +1,25 @@ +; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s + +%__cblayout_CB = type <{ float }> + +@CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 4, 0)) poison +; CHECK: @CB.cb = +; CHECK-NOT: external {{.*}} addrspace(2) global +@x = external local_unnamed_addr addrspace(2) global float, align 4 + +; CHECK: define void @f +define void @f(ptr %dst) { +entry: + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0) + ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 + ; CHECK: store float [[X]], ptr %dst + %x = load float, ptr addrspace(2) @x, align 4 + store float %x, ptr %dst, align 4 + ret void +} + +; CHECK-NOT: !hlsl.cbs = +!hlsl.cbs = !{!0} + +!0 = !{ptr @CB.cb, ptr addrspace(2) @x} diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll new file mode 100644 index 0000000000000..abe087dbe6100 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll @@ -0,0 +1,36 @@ +; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s + +; cbuffer CB : register(b0) { +; float a1[3]; +; } +%__cblayout_CB = type <{ [3 x float] }> + +@CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) poison +; CHECK: @CB.cb = +; CHECK-NOT: external {{.*}} addrspace(2) global +@a1 = external local_unnamed_addr addrspace(2) global [3 x float], align 4 + +; CHECK: define void @f +define void @f(ptr %dst) { +entry: + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1) + ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 + ; CHECK: store float [[X]], ptr %dst + %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 4), align 4 + store float %a1, ptr %dst, align 32 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1) + ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 + ; CHECK: store float [[X]], ptr %dst + %a2 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 4), align 4 + store float %a2, ptr %dst, align 32 + + ret void +} + +; CHECK-NOT: !hlsl.cbs = +!hlsl.cbs = !{!0} + +!0 = !{ptr @CB.cb, ptr addrspace(2) @a1} diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll new file mode 100644 index 0000000000000..125d6b66c0107 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll @@ -0,0 +1,100 @@ +; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s + +; cbuffer CB { +; float a1; // offset 0, size 4 +; int a2; // offset 4, size 4 +; bool a3; // offset 8, size 4 +; float16_t a4; // offset 12, size 2 +; uint16_t a5; // offset 14, size 2 +; double a6; // offset 16, size 8 +; int64_t a7; // offset 24, size 8 +; } +%__cblayout_CB = type <{ float, i32, i32, half, i16, double, i64 }> + +@CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 32, 0, 4, 8, 12, 14, 16, 24)) poison +; CHECK: @CB.cb = +; CHECK-NOT: external {{.*}} addrspace(2) global +@a1 = external local_unnamed_addr addrspace(2) global float, align 4 +@a2 = external local_unnamed_addr addrspace(2) global i32, align 4 +@a3 = external local_unnamed_addr addrspace(2) global i32, align 4 +@a4 = external local_unnamed_addr addrspace(2) global half, align 2 +@a5 = external local_unnamed_addr addrspace(2) global i16, align 2 +@a6 = external local_unnamed_addr addrspace(2) global double, align 8 +@a7 = external local_unnamed_addr addrspace(2) global i64, align 8 + +; CHECK: define void @f +define void @f(ptr %dst) { +entry: + %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 32, 0, 4, 8, 12, 14, 16, 24)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false) + store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 32, 0, 4, 8, 12, 14, 16, 24)) %CB.cb_h.i.i, ptr @CB.cb, align 4 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0) + ; CHECK: [[A1:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 + ; CHECK: store float [[A1]], ptr %dst + %a1 = load float, ptr addrspace(2) @a1, align 4 + store float %a1, ptr %dst, align 8 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0) + ; CHECK: [[A2:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 4 + ; CHECK: store i32 [[A2]], ptr [[PTR]] + %a2 = load i32, ptr addrspace(2) @a2, align 4 + %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 4 + store i32 %a2, ptr %a2.i, align 4 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0) + ; CHECK: [[A3:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 8 + ; CHECK: store i32 [[A3]], ptr [[PTR]] + %a3 = load i32, ptr addrspace(2) @a3, align 4, !range !1, !noundef !2 + %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 8 + store i32 %a3, ptr %a3.i, align 8 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0) + ; CHECK: [[A4:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 6 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 12 + ; CHECK: store half [[A4]], ptr [[PTR]] + %a4 = load half, ptr addrspace(2) @a4, align 2 + %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 12 + store half %a4, ptr %a4.i, align 4 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0) + ; CHECK: [[A5:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 7 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 14 + ; CHECK: store i16 [[A5]], ptr [[PTR]] + %a5 = load i16, ptr addrspace(2) @a5, align 2 + %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 14 + store i16 %a5, ptr %a5.i, align 2 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1) + ; CHECK: [[A6:%.*]] = extractvalue { double, double } [[LOAD]], 0 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 16 + ; CHECK: store double [[A6]], ptr [[PTR]] + %a6 = load double, ptr addrspace(2) @a6, align 8 + %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 16 + store double %a6, ptr %a6.i, align 8 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1) + ; CHECK: [[A7:%.*]] = extractvalue { i64, i64 } [[LOAD]], 1 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 24 + ; CHECK: store i64 [[A7]], ptr [[PTR]] + %a7 = load i64, ptr addrspace(2) @a7, align 8 + %a7.i = getelementptr inbounds nuw i8, ptr %dst, i32 24 + store i64 %a7, ptr %a7.i, align 8 + + ret void +} + +; CHECK-NOT: !hlsl.cbs = +!hlsl.cbs = !{!0} + +!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, ptr addrspace(2) @a5, ptr addrspace(2) @a6, ptr addrspace(2) @a7} +!1 = !{i32 0, i32 2} +!2 = !{} diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll new file mode 100644 index 0000000000000..6addf7482ac37 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll @@ -0,0 +1,119 @@ +; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s + +; cbuffer CB { +; float3 a1; // offset 0, size 12 (+4) +; double3 a2; // offset 16, size 24 +; float16_t2 a3; // offset 40, size 4 (+4) +; uint64_t3 a4; // offset 48, size 24 (+8) +; int4 a5; // offset 80, size 16 +; uint16_t3 a6; // offset 96, size 6 (+10) +; }; +%__cblayout_CB = type <{ <3 x float>, <3 x double>, <2 x half>, <3 x i64>, <4 x i32>, <3 x i16> }> + +@CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 102, 0, 16, 40, 48, 80, 96)) poison +; CHECK: @CB.cb = +; CHECK-NOT: external {{.*}} addrspace(2) global +@a1 = external local_unnamed_addr addrspace(2) global <3 x float>, align 16 +@a2 = external local_unnamed_addr addrspace(2) global <3 x double>, align 32 +@a3 = external local_unnamed_addr addrspace(2) global <2 x half>, align 4 +@a4 = external local_unnamed_addr addrspace(2) global <3 x i64>, align 32 +@a5 = external local_unnamed_addr addrspace(2) global <4 x i32>, align 16 +@a6 = external local_unnamed_addr addrspace(2) global <3 x i16>, align 8 + +; CHECK: define void @f +define void @f(ptr %dst) { +entry: + %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 102, 0, 16, 40, 48, 80, 96)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false) + store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 102, 0, 16, 40, 48, 80, 96)) %CB.cb_h.i.i, ptr @CB.cb, align 4 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0) + ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 + ; CHECK: [[Y:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 1 + ; CHECK: [[Z:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 2 + ; CHECK: [[VEC0:%.*]] = insertelement <3 x float> poison, float [[X]], i32 0 + ; CHECK: [[VEC1:%.*]] = insertelement <3 x float> [[VEC0]], float [[Y]], i32 1 + ; CHECK: [[VEC2:%.*]] = insertelement <3 x float> [[VEC1]], float [[Z]], i32 2 + ; CHECK: store <3 x float> [[VEC2]], ptr %dst + %a1 = load <3 x float>, ptr addrspace(2) @a1, align 16 + store <3 x float> %a1, ptr %dst, align 4 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1) + ; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0 + ; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1 + ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 2) + ; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0 + ; CHECK: [[VEC0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0 + ; CHECK: [[VEC1:%.*]] = insertelement <3 x double> [[VEC0]], double [[Y]], i32 1 + ; CHECK: [[VEC2:%.*]] = insertelement <3 x double> [[VEC1]], double [[Z]], i32 2 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 16 + ; CHECK: store <3 x double> [[VEC2]], ptr [[PTR]] + %a2 = load <3 x double>, ptr addrspace(2) @a2, align 32 + %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 16 + store <3 x double> %a2, ptr %a2.i, align 8 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 2) + ; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 4 + ; CHECK: [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 5 + ; CHECK: [[VEC0:%.*]] = insertelement <2 x half> poison, half [[X]], i32 0 + ; CHECK: [[VEC1:%.*]] = insertelement <2 x half> [[VEC0]], half [[Y]], i32 1 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 40 + ; CHECK: store <2 x half> [[VEC1]], ptr [[PTR]] + %a3 = load <2 x half>, ptr addrspace(2) @a3, align 4 + %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 40 + store <2 x half> %a3, ptr %a3.i, align 2 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 3) + ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 + ; CHECK: [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 1 + ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 4) + ; CHECK: [[Z:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 + ; CHECK: [[VEC0:%.*]] = insertelement <3 x i64> poison, i64 [[X]], i32 0 + ; CHECK: [[VEC1:%.*]] = insertelement <3 x i64> [[VEC0]], i64 [[Y]], i32 1 + ; CHECK: [[VEC2:%.*]] = insertelement <3 x i64> [[VEC1]], i64 [[Z]], i32 2 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 48 + ; CHECK: store <3 x i64> [[VEC2]], ptr [[PTR]] + %a4 = load <3 x i64>, ptr addrspace(2) @a4, align 32 + %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 48 + store <3 x i64> %a4, ptr %a4.i, align 8 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 5) + ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 + ; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 + ; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2 + ; CHECK: [[A:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3 + ; CHECK: [[VEC0:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0 + ; CHECK: [[VEC1:%.*]] = insertelement <4 x i32> [[VEC0]], i32 [[Y]], i32 1 + ; CHECK: [[VEC2:%.*]] = insertelement <4 x i32> [[VEC1]], i32 [[Z]], i32 2 + ; CHECK: [[VEC3:%.*]] = insertelement <4 x i32> [[VEC2]], i32 [[A]], i32 3 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 72 + ; CHECK: store <4 x i32> [[VEC3]], ptr [[PTR]] + %a5 = load <4 x i32>, ptr addrspace(2) @a5, align 16 + %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 72 + store <4 x i32> %a5, ptr %a5.i, align 4 + + ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb + ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 6) + ; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0 + ; CHECK: [[Y:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 1 + ; CHECK: [[Z:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 2 + ; CHECK: [[VEC0:%.*]] = insertelement <3 x i16> poison, i16 [[X]], i32 0 + ; CHECK: [[VEC1:%.*]] = insertelement <3 x i16> [[VEC0]], i16 [[Y]], i32 1 + ; CHECK: [[VEC2:%.*]] = insertelement <3 x i16> [[VEC1]], i16 [[Z]], i32 2 + ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 88 + ; CHECK: store <3 x i16> [[VEC2]], ptr [[PTR]] + %a6 = load <3 x i16>, ptr addrspace(2) @a6, align 8 + %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 88 + store <3 x i16> %a6, ptr %a6.i, align 2 + + ret void +} + +; CHECK-NOT: !hlsl.cbs = +!hlsl.cbs = !{!0} + +!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, ptr addrspace(2) @a5, ptr addrspace(2) @a6} diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll index ee70cec534bc5..b1bd9f16f4efa 100644 --- a/llvm/test/CodeGen/DirectX/llc-pipeline.ll +++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll @@ -15,6 +15,7 @@ ; CHECK-NEXT: ModulePass Manager ; CHECK-NEXT: DXIL Finalize Linkage ; CHECK-NEXT: DXIL Intrinsic Expansion +; CHECK-NEXT: DXIL CBuffer Access ; CHECK-NEXT: DXIL Data Scalarization ; CHECK-NEXT: DXIL Array Flattener ; CHECK-NEXT: FunctionPass Manager