diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f7db99784..74244c1d58 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -686,6 +686,8 @@ add_subdirectory(include/dxc) # really depend on anything else in the build it is safe. list(APPEND LLVM_COMMON_DEPENDS HCTGen) +add_subdirectory(utils/hct) + if(EXISTS "${LLVM_MAIN_SRC_DIR}/external") add_subdirectory(external) # SPIRV change endif() diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index f8d5b740f7..db238c99d6 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -147,12 +147,19 @@ const unsigned kMaxMSTotalSigRows = 32; const unsigned kMaxMSSMSize = 1024 * 28; const unsigned kMinWaveSize = 4; const unsigned kMaxWaveSize = 128; +const unsigned kDefaultMaxVectorLength = 4; +const unsigned kSM69MaxVectorLength = 1024; const float kMaxMipLodBias = 15.99f; const float kMinMipLodBias = -16.0f; const unsigned kResRetStatusIndex = 4; +/* hctdb_instrhelp.get_max_oload_dims()*/ +// OLOAD_DIMS-TEXT:BEGIN +const unsigned kDxilMaxOloadDims = 2; +// OLOAD_DIMS-TEXT:END + enum class ComponentType : uint32_t { Invalid = 0, I1, @@ -463,6 +470,11 @@ inline bool IsTBuffer(DXIL::ResourceKind ResourceKind) { return ResourceKind == DXIL::ResourceKind::TBuffer; } +inline bool IsCTBuffer(DXIL::ResourceKind ResourceKind) { + return ResourceKind == DXIL::ResourceKind::CBuffer || + ResourceKind == DXIL::ResourceKind::TBuffer; +} + /// Whether the resource kind is a FeedbackTexture. inline bool IsFeedbackTexture(DXIL::ResourceKind ResourceKind) { return ResourceKind == DXIL::ResourceKind::FeedbackTexture2D || @@ -475,6 +487,9 @@ inline bool IsFeedbackTexture(DXIL::ResourceKind ResourceKind) { // Enumeration for operations specified by DXIL enum class OpCode : unsigned { // + RawBufferVectorLoad = 303, // reads from a raw buffer and structured buffer + RawBufferVectorStore = + 304, // writes to a RWByteAddressBuffer or RWStructuredBuffer Reserved0 = 226, // Reserved Reserved1 = 227, // Reserved Reserved10 = 236, // Reserved @@ -1029,8 +1044,9 @@ enum class OpCode : unsigned { NumOpCodes_Dxil_1_6 = 222, NumOpCodes_Dxil_1_7 = 226, NumOpCodes_Dxil_1_8 = 258, + NumOpCodes_Dxil_1_9 = 305, - NumOpCodes = 303 // exclusive last value of enumeration + NumOpCodes = 305 // exclusive last value of enumeration }; // OPCODE-ENUM:END @@ -1042,6 +1058,8 @@ enum class OpCode : unsigned { // Groups for DXIL operations with equivalent function templates enum class OpCodeClass : unsigned { // + RawBufferVectorLoad, + RawBufferVectorStore, Reserved, // Amplification shader instructions @@ -1337,8 +1355,9 @@ enum class OpCodeClass : unsigned { NumOpClasses_Dxil_1_6 = 149, NumOpClasses_Dxil_1_7 = 153, NumOpClasses_Dxil_1_8 = 174, + NumOpClasses_Dxil_1_9 = 177, - NumOpClasses = 175 // exclusive last value of enumeration + NumOpClasses = 177 // exclusive last value of enumeration }; // OPCODECLASS-ENUM:END diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h index 11ab8e3b8d..c685f64b49 100644 --- a/include/dxc/DXIL/DxilInstructions.h +++ b/include/dxc/DXIL/DxilInstructions.h @@ -645,6 +645,42 @@ struct LlvmInst_VAArg { bool isAllowed() const { return false; } }; +/// This instruction extracts from vector +struct LlvmInst_ExtractElement { + llvm::Instruction *Instr; + // Construction and identification + LlvmInst_ExtractElement(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return Instr->getOpcode() == llvm::Instruction::ExtractElement; + } + // Validation support + bool isAllowed() const { return true; } +}; + +/// This instruction inserts into vector +struct LlvmInst_InsertElement { + llvm::Instruction *Instr; + // Construction and identification + LlvmInst_InsertElement(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return Instr->getOpcode() == llvm::Instruction::InsertElement; + } + // Validation support + bool isAllowed() const { return true; } +}; + +/// This instruction Shuffle two vectors +struct LlvmInst_ShuffleVector { + llvm::Instruction *Instr; + // Construction and identification + LlvmInst_ShuffleVector(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return Instr->getOpcode() == llvm::Instruction::ShuffleVector; + } + // Validation support + bool isAllowed() const { return true; } +}; + /// This instruction extracts from aggregate struct LlvmInst_ExtractValue { llvm::Instruction *Instr; @@ -8813,5 +8849,98 @@ struct DxilInst_AllocateRayQuery2 { llvm::APInt(32, (uint64_t)val))); } }; + +/// This instruction reads from a raw buffer and structured buffer +struct DxilInst_RawBufferVectorLoad { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_RawBufferVectorLoad(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::RawBufferVectorLoad); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (5 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_srv = 1, + arg_index = 2, + arg_elementOffset = 3, + arg_alignment = 4, + }; + // Accessors + llvm::Value *get_srv() const { return Instr->getOperand(1); } + void set_srv(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_index() const { return Instr->getOperand(2); } + void set_index(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_elementOffset() const { return Instr->getOperand(3); } + void set_elementOffset(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_alignment() const { return Instr->getOperand(4); } + void set_alignment(llvm::Value *val) { Instr->setOperand(4, val); } + int32_t get_alignment_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(4)) + ->getZExtValue()); + } + void set_alignment_val(int32_t val) { + Instr->setOperand(4, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } +}; + +/// This instruction writes to a RWByteAddressBuffer or RWStructuredBuffer +struct DxilInst_RawBufferVectorStore { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_RawBufferVectorStore(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::RawBufferVectorStore); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (6 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_uav = 1, + arg_index = 2, + arg_elementOffset = 3, + arg_value0 = 4, + arg_alignment = 5, + }; + // Accessors + llvm::Value *get_uav() const { return Instr->getOperand(1); } + void set_uav(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_index() const { return Instr->getOperand(2); } + void set_index(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_elementOffset() const { return Instr->getOperand(3); } + void set_elementOffset(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_value0() const { return Instr->getOperand(4); } + void set_value0(llvm::Value *val) { Instr->setOperand(4, val); } + llvm::Value *get_alignment() const { return Instr->getOperand(5); } + void set_alignment(llvm::Value *val) { Instr->setOperand(5, val); } + int32_t get_alignment_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(5)) + ->getZExtValue()); + } + void set_alignment_val(int32_t val) { + Instr->setOperand(5, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } +}; // INSTR-HELPER:END } // namespace hlsl diff --git a/include/dxc/DXIL/DxilOperations.h b/include/dxc/DXIL/DxilOperations.h index 3514701327..955b5449db 100644 --- a/include/dxc/DXIL/DxilOperations.h +++ b/include/dxc/DXIL/DxilOperations.h @@ -58,11 +58,14 @@ class OP { void RefreshCache(); llvm::Function *GetOpFunc(OpCode OpCode, llvm::Type *pOverloadType); + llvm::Function *GetOpFunc(OpCode OpCode, + llvm::ArrayRef ExtendedOverloads); const llvm::SmallMapVector & GetOpFuncList(OpCode OpCode) const; bool IsDxilOpUsed(OpCode opcode) const; void RemoveFunction(llvm::Function *F); llvm::LLVMContext &GetCtx() { return m_Ctx; } + llvm::Module *GetModule() { return m_pModule; } llvm::Type *GetHandleType() const; llvm::Type *GetNodeHandleType() const; llvm::Type *GetNodeRecordHandleType() const; @@ -83,6 +86,10 @@ class OP { llvm::Type *GetVectorType(unsigned numElements, llvm::Type *pOverloadType); bool IsResRetType(llvm::Type *Ty); + // Construct an unnamed struct type containing the set of member types. + llvm::StructType * + GetExtendedOverloadType(llvm::ArrayRef OverloadTypes); + // Try to get the opcode class for a function. // Return true and set `opClass` if the given function is a dxil function. // Return false if the given function is not a dxil function. @@ -140,6 +147,8 @@ class OP { unsigned valMinor, unsigned &major, unsigned &minor, unsigned &mask); + static bool IsDxilOpExtendedOverload(OpCode C); + private: // Per-module properties. llvm::LLVMContext &m_Ctx; @@ -164,8 +173,10 @@ class OP { static const unsigned kUserDefineTypeSlot = 9; static const unsigned kObjectTypeSlot = 10; + static const unsigned kVectorTypeSlot = 11; + static const unsigned kExtendedTypeSlot = 12; static const unsigned kNumTypeOverloads = - 11; // void, h,f,d, i1, i8,i16,i32,i64, udt, obj + 13; // void, h,f,d, i1, i8,i16,i32,i64, udt, obj, vec, extended llvm::Type *m_pResRetType[kNumTypeOverloads]; llvm::Type *m_pCBufferRetType[kNumTypeOverloads]; @@ -179,14 +190,39 @@ class OP { private: // Static properties. + struct OverloadMask { + // mask of type slot bits as (1 << TypeSlot) + uint16_t SlotMask; + static_assert(kNumTypeOverloads <= (sizeof(SlotMask) * 8)); + bool operator[](unsigned TypeSlot) const { + return (TypeSlot < kNumTypeOverloads) ? (bool)(SlotMask & (1 << TypeSlot)) + : 0; + } + operator bool() const { return SlotMask != 0; } + }; struct OpCodeProperty { OpCode opCode; const char *pOpCodeName; OpCodeClass opCodeClass; const char *pOpCodeClassName; bool bAllowOverload[kNumTypeOverloads]; // void, h,f,d, i1, i8,i16,i32,i64, - // udt + // udt, obj, vec, extended llvm::Attribute::AttrKind FuncAttr; + + // Extended Type Overloads: + // This is an encoding for a multi-dimensional overload. + // 1. Only bAllowOverload[kExtendedTypeSlot] is set to true + // 2. ExtendedOverloads defines allowed types for each overload index + // 3. AllowedVectorElements defines allowed vector component types, + // when kVectorTypeSlot bit is set for the corresponding overload index. + // This includes when a single vector overload type is specified with + // bAllowOverload[kVectorTypeSlot]. + + // A bit mask of allowed type slots per extended overload + OverloadMask ExtendedOverloads[DXIL::kDxilMaxOloadDims]; + // A bit mask of allowed vector element types for the vector overload + // or each corresponding extended vector overload. + OverloadMask AllowedVectorElements[DXIL::kDxilMaxOloadDims]; }; static const OpCodeProperty m_OpCodeProps[(unsigned)OpCode::NumOpCodes]; diff --git a/include/dxc/HlslIntrinsicOp.h b/include/dxc/HlslIntrinsicOp.h index fcc9bb11b1..41c72d1a51 100644 --- a/include/dxc/HlslIntrinsicOp.h +++ b/include/dxc/HlslIntrinsicOp.h @@ -5,378 +5,366 @@ #pragma once namespace hlsl { enum class IntrinsicOp { - IOP_AcceptHitAndEndSearch, - IOP_AddUint64, - IOP_AllMemoryBarrier, - IOP_AllMemoryBarrierWithGroupSync, - IOP_AllocateRayQuery, - IOP_Barrier, - IOP_CallShader, - IOP_CheckAccessFullyMapped, - IOP_CreateResourceFromHeap, - IOP_D3DCOLORtoUBYTE4, - IOP_DeviceMemoryBarrier, - IOP_DeviceMemoryBarrierWithGroupSync, - IOP_DispatchMesh, - IOP_DispatchRaysDimensions, - IOP_DispatchRaysIndex, - IOP_EvaluateAttributeAtSample, - IOP_EvaluateAttributeCentroid, - IOP_EvaluateAttributeSnapped, - IOP_GeometryIndex, - IOP_GetAttributeAtVertex, - IOP_GetRemainingRecursionLevels, - IOP_GetRenderTargetSampleCount, - IOP_GetRenderTargetSamplePosition, - IOP_GroupMemoryBarrier, - IOP_GroupMemoryBarrierWithGroupSync, - IOP_HitKind, - IOP_IgnoreHit, - IOP_InstanceID, - IOP_InstanceIndex, - IOP_InterlockedAdd, - IOP_InterlockedAnd, - IOP_InterlockedCompareExchange, - IOP_InterlockedCompareExchangeFloatBitwise, - IOP_InterlockedCompareStore, - IOP_InterlockedCompareStoreFloatBitwise, - IOP_InterlockedExchange, - IOP_InterlockedMax, - IOP_InterlockedMin, - IOP_InterlockedOr, - IOP_InterlockedXor, - IOP_IsHelperLane, - IOP_NonUniformResourceIndex, - IOP_ObjectRayDirection, - IOP_ObjectRayOrigin, - IOP_ObjectToWorld, - IOP_ObjectToWorld3x4, - IOP_ObjectToWorld4x3, - IOP_PrimitiveIndex, - IOP_Process2DQuadTessFactorsAvg, - IOP_Process2DQuadTessFactorsMax, - IOP_Process2DQuadTessFactorsMin, - IOP_ProcessIsolineTessFactors, - IOP_ProcessQuadTessFactorsAvg, - IOP_ProcessQuadTessFactorsMax, - IOP_ProcessQuadTessFactorsMin, - IOP_ProcessTriTessFactorsAvg, - IOP_ProcessTriTessFactorsMax, - IOP_ProcessTriTessFactorsMin, - IOP_QuadAll, - IOP_QuadAny, - IOP_QuadReadAcrossDiagonal, - IOP_QuadReadAcrossX, - IOP_QuadReadAcrossY, - IOP_QuadReadLaneAt, - IOP_RayFlags, - IOP_RayTCurrent, - IOP_RayTMin, - IOP_ReportHit, - IOP_SetMeshOutputCounts, - IOP_TraceRay, - IOP_WaveActiveAllEqual, - IOP_WaveActiveAllTrue, - IOP_WaveActiveAnyTrue, - IOP_WaveActiveBallot, - IOP_WaveActiveBitAnd, - IOP_WaveActiveBitOr, - IOP_WaveActiveBitXor, - IOP_WaveActiveCountBits, - IOP_WaveActiveMax, - IOP_WaveActiveMin, - IOP_WaveActiveProduct, - IOP_WaveActiveSum, - IOP_WaveGetLaneCount, - IOP_WaveGetLaneIndex, - IOP_WaveIsFirstLane, - IOP_WaveMatch, - IOP_WaveMultiPrefixBitAnd, - IOP_WaveMultiPrefixBitOr, - IOP_WaveMultiPrefixBitXor, - IOP_WaveMultiPrefixCountBits, - IOP_WaveMultiPrefixProduct, - IOP_WaveMultiPrefixSum, - IOP_WavePrefixCountBits, - IOP_WavePrefixProduct, - IOP_WavePrefixSum, - IOP_WaveReadLaneAt, - IOP_WaveReadLaneFirst, - IOP_WorldRayDirection, - IOP_WorldRayOrigin, - IOP_WorldToObject, - IOP_WorldToObject3x4, - IOP_WorldToObject4x3, - IOP_abort, - IOP_abs, - IOP_acos, - IOP_all, - IOP_and, - IOP_any, - IOP_asdouble, - IOP_asfloat, - IOP_asfloat16, - IOP_asin, - IOP_asint, - IOP_asint16, - IOP_asuint, - IOP_asuint16, - IOP_atan, - IOP_atan2, - IOP_ceil, - IOP_clamp, - IOP_clip, - IOP_cos, - IOP_cosh, - IOP_countbits, - IOP_cross, - IOP_ddx, - IOP_ddx_coarse, - IOP_ddx_fine, - IOP_ddy, - IOP_ddy_coarse, - IOP_ddy_fine, - IOP_degrees, - IOP_determinant, - IOP_distance, - IOP_dot, - IOP_dot2add, - IOP_dot4add_i8packed, - IOP_dot4add_u8packed, - IOP_dst, - IOP_exp, - IOP_exp2, - IOP_f16tof32, - IOP_f32tof16, - IOP_faceforward, - IOP_firstbithigh, - IOP_firstbitlow, - IOP_floor, - IOP_fma, - IOP_fmod, - IOP_frac, - IOP_frexp, - IOP_fwidth, - IOP_isfinite, - IOP_isinf, - IOP_isnan, - IOP_ldexp, - IOP_length, - IOP_lerp, - IOP_lit, - IOP_log, - IOP_log10, - IOP_log2, - IOP_mad, - IOP_max, - IOP_min, - IOP_modf, - IOP_msad4, - IOP_mul, - IOP_normalize, - IOP_or, - IOP_pack_clamp_s8, - IOP_pack_clamp_u8, - IOP_pack_s8, - IOP_pack_u8, - IOP_pow, - IOP_printf, - IOP_radians, - IOP_rcp, - IOP_reflect, - IOP_refract, - IOP_reversebits, - IOP_round, - IOP_rsqrt, - IOP_saturate, - IOP_select, - IOP_sign, - IOP_sin, - IOP_sincos, - IOP_sinh, - IOP_smoothstep, - IOP_source_mark, - IOP_sqrt, - IOP_step, - IOP_tan, - IOP_tanh, - IOP_tex1D, - IOP_tex1Dbias, - IOP_tex1Dgrad, - IOP_tex1Dlod, - IOP_tex1Dproj, - IOP_tex2D, - IOP_tex2Dbias, - IOP_tex2Dgrad, - IOP_tex2Dlod, - IOP_tex2Dproj, - IOP_tex3D, - IOP_tex3Dbias, - IOP_tex3Dgrad, - IOP_tex3Dlod, - IOP_tex3Dproj, - IOP_texCUBE, - IOP_texCUBEbias, - IOP_texCUBEgrad, - IOP_texCUBElod, - IOP_texCUBEproj, - IOP_transpose, - IOP_trunc, - IOP_unpack_s8s16, - IOP_unpack_s8s32, - IOP_unpack_u8u16, - IOP_unpack_u8u32, -#ifdef ENABLE_SPIRV_CODEGEN - IOP_VkRawBufferLoad, -#endif // ENABLE_SPIRV_CODEGEN -#ifdef ENABLE_SPIRV_CODEGEN - IOP_VkRawBufferStore, -#endif // ENABLE_SPIRV_CODEGEN -#ifdef ENABLE_SPIRV_CODEGEN - IOP_VkReadClock, -#endif // ENABLE_SPIRV_CODEGEN -#ifdef ENABLE_SPIRV_CODEGEN - IOP_Vkext_execution_mode, -#endif // ENABLE_SPIRV_CODEGEN -#ifdef ENABLE_SPIRV_CODEGEN - IOP_Vkext_execution_mode_id, -#endif // ENABLE_SPIRV_CODEGEN - MOP_Append, - MOP_RestartStrip, - MOP_CalculateLevelOfDetail, - MOP_CalculateLevelOfDetailUnclamped, - MOP_GetDimensions, - MOP_Load, - MOP_Sample, - MOP_SampleBias, - MOP_SampleCmp, - MOP_SampleCmpBias, - MOP_SampleCmpGrad, - MOP_SampleCmpLevel, - MOP_SampleCmpLevelZero, - MOP_SampleGrad, - MOP_SampleLevel, - MOP_Gather, - MOP_GatherAlpha, - MOP_GatherBlue, - MOP_GatherCmp, - MOP_GatherCmpAlpha, - MOP_GatherCmpBlue, - MOP_GatherCmpGreen, - MOP_GatherCmpRed, - MOP_GatherGreen, - MOP_GatherRaw, - MOP_GatherRed, - MOP_GetSamplePosition, - MOP_Load2, - MOP_Load3, - MOP_Load4, - MOP_InterlockedAdd, - MOP_InterlockedAdd64, - MOP_InterlockedAnd, - MOP_InterlockedAnd64, - MOP_InterlockedCompareExchange, - MOP_InterlockedCompareExchange64, - MOP_InterlockedCompareExchangeFloatBitwise, - MOP_InterlockedCompareStore, - MOP_InterlockedCompareStore64, - MOP_InterlockedCompareStoreFloatBitwise, - MOP_InterlockedExchange, - MOP_InterlockedExchange64, - MOP_InterlockedExchangeFloat, - MOP_InterlockedMax, - MOP_InterlockedMax64, - MOP_InterlockedMin, - MOP_InterlockedMin64, - MOP_InterlockedOr, - MOP_InterlockedOr64, - MOP_InterlockedXor, - MOP_InterlockedXor64, - MOP_Store, - MOP_Store2, - MOP_Store3, - MOP_Store4, - MOP_DecrementCounter, - MOP_IncrementCounter, - MOP_Consume, - MOP_WriteSamplerFeedback, - MOP_WriteSamplerFeedbackBias, - MOP_WriteSamplerFeedbackGrad, - MOP_WriteSamplerFeedbackLevel, - MOP_Abort, - MOP_CandidateGeometryIndex, - MOP_CandidateInstanceContributionToHitGroupIndex, - MOP_CandidateInstanceID, - MOP_CandidateInstanceIndex, - MOP_CandidateObjectRayDirection, - MOP_CandidateObjectRayOrigin, - MOP_CandidateObjectToWorld3x4, - MOP_CandidateObjectToWorld4x3, - MOP_CandidatePrimitiveIndex, - MOP_CandidateProceduralPrimitiveNonOpaque, - MOP_CandidateTriangleBarycentrics, - MOP_CandidateTriangleFrontFace, - MOP_CandidateTriangleRayT, - MOP_CandidateType, - MOP_CandidateWorldToObject3x4, - MOP_CandidateWorldToObject4x3, - MOP_CommitNonOpaqueTriangleHit, - MOP_CommitProceduralPrimitiveHit, - MOP_CommittedGeometryIndex, - MOP_CommittedInstanceContributionToHitGroupIndex, - MOP_CommittedInstanceID, - MOP_CommittedInstanceIndex, - MOP_CommittedObjectRayDirection, - MOP_CommittedObjectRayOrigin, - MOP_CommittedObjectToWorld3x4, - MOP_CommittedObjectToWorld4x3, - MOP_CommittedPrimitiveIndex, - MOP_CommittedRayT, - MOP_CommittedStatus, - MOP_CommittedTriangleBarycentrics, - MOP_CommittedTriangleFrontFace, - MOP_CommittedWorldToObject3x4, - MOP_CommittedWorldToObject4x3, - MOP_Proceed, - MOP_RayFlags, - MOP_RayTMin, - MOP_TraceRayInline, - MOP_WorldRayDirection, - MOP_WorldRayOrigin, - MOP_Count, - MOP_FinishedCrossGroupSharing, - MOP_GetGroupNodeOutputRecords, - MOP_GetThreadNodeOutputRecords, - MOP_IsValid, - MOP_GroupIncrementOutputCount, - MOP_ThreadIncrementOutputCount, - MOP_OutputComplete, -#ifdef ENABLE_SPIRV_CODEGEN - MOP_SubpassLoad, -#endif // ENABLE_SPIRV_CODEGEN + IOP_AcceptHitAndEndSearch = 0, + IOP_AddUint64 = 1, + IOP_AllMemoryBarrier = 2, + IOP_AllMemoryBarrierWithGroupSync = 3, + IOP_AllocateRayQuery = 4, + IOP_Barrier = 5, + IOP_CallShader = 6, + IOP_CheckAccessFullyMapped = 7, + IOP_CreateResourceFromHeap = 8, + IOP_D3DCOLORtoUBYTE4 = 9, + IOP_DeviceMemoryBarrier = 10, + IOP_DeviceMemoryBarrierWithGroupSync = 11, + IOP_DispatchMesh = 12, + IOP_DispatchRaysDimensions = 13, + IOP_DispatchRaysIndex = 14, + IOP_EvaluateAttributeAtSample = 15, + IOP_EvaluateAttributeCentroid = 16, + IOP_EvaluateAttributeSnapped = 17, + IOP_GeometryIndex = 18, + IOP_GetAttributeAtVertex = 19, + IOP_GetRemainingRecursionLevels = 20, + IOP_GetRenderTargetSampleCount = 21, + IOP_GetRenderTargetSamplePosition = 22, + IOP_GroupMemoryBarrier = 23, + IOP_GroupMemoryBarrierWithGroupSync = 24, + IOP_HitKind = 25, + IOP_IgnoreHit = 26, + IOP_InstanceID = 27, + IOP_InstanceIndex = 28, + IOP_InterlockedAdd = 29, + IOP_InterlockedAnd = 30, + IOP_InterlockedCompareExchange = 31, + IOP_InterlockedCompareExchangeFloatBitwise = 32, + IOP_InterlockedCompareStore = 33, + IOP_InterlockedCompareStoreFloatBitwise = 34, + IOP_InterlockedExchange = 35, + IOP_InterlockedMax = 36, + IOP_InterlockedMin = 37, + IOP_InterlockedOr = 38, + IOP_InterlockedXor = 39, + IOP_IsHelperLane = 40, + IOP_NonUniformResourceIndex = 41, + IOP_ObjectRayDirection = 42, + IOP_ObjectRayOrigin = 43, + IOP_ObjectToWorld = 44, + IOP_ObjectToWorld3x4 = 45, + IOP_ObjectToWorld4x3 = 46, + IOP_PrimitiveIndex = 47, + IOP_Process2DQuadTessFactorsAvg = 48, + IOP_Process2DQuadTessFactorsMax = 49, + IOP_Process2DQuadTessFactorsMin = 50, + IOP_ProcessIsolineTessFactors = 51, + IOP_ProcessQuadTessFactorsAvg = 52, + IOP_ProcessQuadTessFactorsMax = 53, + IOP_ProcessQuadTessFactorsMin = 54, + IOP_ProcessTriTessFactorsAvg = 55, + IOP_ProcessTriTessFactorsMax = 56, + IOP_ProcessTriTessFactorsMin = 57, + IOP_QuadAll = 58, + IOP_QuadAny = 59, + IOP_QuadReadAcrossDiagonal = 60, + IOP_QuadReadAcrossX = 61, + IOP_QuadReadAcrossY = 62, + IOP_QuadReadLaneAt = 63, + IOP_RayFlags = 64, + IOP_RayTCurrent = 65, + IOP_RayTMin = 66, + IOP_ReportHit = 67, + IOP_SetMeshOutputCounts = 68, + IOP_TraceRay = 69, + IOP_WaveActiveAllEqual = 70, + IOP_WaveActiveAllTrue = 71, + IOP_WaveActiveAnyTrue = 72, + IOP_WaveActiveBallot = 73, + IOP_WaveActiveBitAnd = 74, + IOP_WaveActiveBitOr = 75, + IOP_WaveActiveBitXor = 76, + IOP_WaveActiveCountBits = 77, + IOP_WaveActiveMax = 78, + IOP_WaveActiveMin = 79, + IOP_WaveActiveProduct = 80, + IOP_WaveActiveSum = 81, + IOP_WaveGetLaneCount = 82, + IOP_WaveGetLaneIndex = 83, + IOP_WaveIsFirstLane = 84, + IOP_WaveMatch = 85, + IOP_WaveMultiPrefixBitAnd = 86, + IOP_WaveMultiPrefixBitOr = 87, + IOP_WaveMultiPrefixBitXor = 88, + IOP_WaveMultiPrefixCountBits = 89, + IOP_WaveMultiPrefixProduct = 90, + IOP_WaveMultiPrefixSum = 91, + IOP_WavePrefixCountBits = 92, + IOP_WavePrefixProduct = 93, + IOP_WavePrefixSum = 94, + IOP_WaveReadLaneAt = 95, + IOP_WaveReadLaneFirst = 96, + IOP_WorldRayDirection = 97, + IOP_WorldRayOrigin = 98, + IOP_WorldToObject = 99, + IOP_WorldToObject3x4 = 100, + IOP_WorldToObject4x3 = 101, + IOP_abort = 102, + IOP_abs = 103, + IOP_acos = 104, + IOP_all = 105, + IOP_and = 106, + IOP_any = 107, + IOP_asdouble = 108, + IOP_asfloat = 109, + IOP_asfloat16 = 110, + IOP_asin = 111, + IOP_asint = 112, + IOP_asint16 = 113, + IOP_asuint = 114, + IOP_asuint16 = 115, + IOP_atan = 116, + IOP_atan2 = 117, + IOP_ceil = 118, + IOP_clamp = 119, + IOP_clip = 120, + IOP_cos = 121, + IOP_cosh = 122, + IOP_countbits = 123, + IOP_cross = 124, + IOP_ddx = 125, + IOP_ddx_coarse = 126, + IOP_ddx_fine = 127, + IOP_ddy = 128, + IOP_ddy_coarse = 129, + IOP_ddy_fine = 130, + IOP_degrees = 131, + IOP_determinant = 132, + IOP_distance = 133, + IOP_dot = 134, + IOP_dot2add = 135, + IOP_dot4add_i8packed = 136, + IOP_dot4add_u8packed = 137, + IOP_dst = 138, + IOP_exp = 139, + IOP_exp2 = 140, + IOP_f16tof32 = 141, + IOP_f32tof16 = 142, + IOP_faceforward = 143, + IOP_firstbithigh = 144, + IOP_firstbitlow = 145, + IOP_floor = 146, + IOP_fma = 147, + IOP_fmod = 148, + IOP_frac = 149, + IOP_frexp = 150, + IOP_fwidth = 151, + IOP_isfinite = 152, + IOP_isinf = 153, + IOP_isnan = 154, + IOP_ldexp = 155, + IOP_length = 156, + IOP_lerp = 157, + IOP_lit = 158, + IOP_log = 159, + IOP_log10 = 160, + IOP_log2 = 161, + IOP_mad = 162, + IOP_max = 163, + IOP_min = 164, + IOP_modf = 165, + IOP_msad4 = 166, + IOP_mul = 167, + IOP_normalize = 168, + IOP_or = 169, + IOP_pack_clamp_s8 = 170, + IOP_pack_clamp_u8 = 171, + IOP_pack_s8 = 172, + IOP_pack_u8 = 173, + IOP_pow = 174, + IOP_printf = 175, + IOP_radians = 176, + IOP_rcp = 177, + IOP_reflect = 178, + IOP_refract = 179, + IOP_reversebits = 180, + IOP_round = 181, + IOP_rsqrt = 182, + IOP_saturate = 183, + IOP_select = 184, + IOP_sign = 185, + IOP_sin = 186, + IOP_sincos = 187, + IOP_sinh = 188, + IOP_smoothstep = 189, + IOP_source_mark = 190, + IOP_sqrt = 191, + IOP_step = 192, + IOP_tan = 193, + IOP_tanh = 194, + IOP_tex1D = 195, + IOP_tex1Dbias = 196, + IOP_tex1Dgrad = 197, + IOP_tex1Dlod = 198, + IOP_tex1Dproj = 199, + IOP_tex2D = 200, + IOP_tex2Dbias = 201, + IOP_tex2Dgrad = 202, + IOP_tex2Dlod = 203, + IOP_tex2Dproj = 204, + IOP_tex3D = 205, + IOP_tex3Dbias = 206, + IOP_tex3Dgrad = 207, + IOP_tex3Dlod = 208, + IOP_tex3Dproj = 209, + IOP_texCUBE = 210, + IOP_texCUBEbias = 211, + IOP_texCUBEgrad = 212, + IOP_texCUBElod = 213, + IOP_texCUBEproj = 214, + IOP_transpose = 215, + IOP_trunc = 216, + IOP_unpack_s8s16 = 217, + IOP_unpack_s8s32 = 218, + IOP_unpack_u8u16 = 219, + IOP_unpack_u8u32 = 220, + IOP_VkRawBufferLoad = 221, + IOP_VkRawBufferStore = 222, + IOP_VkReadClock = 223, + IOP_Vkext_execution_mode = 224, + IOP_Vkext_execution_mode_id = 225, + MOP_Append = 226, + MOP_RestartStrip = 227, + MOP_CalculateLevelOfDetail = 228, + MOP_CalculateLevelOfDetailUnclamped = 229, + MOP_GetDimensions = 230, + MOP_Load = 231, + MOP_Sample = 232, + MOP_SampleBias = 233, + MOP_SampleCmp = 234, + MOP_SampleCmpBias = 235, + MOP_SampleCmpGrad = 236, + MOP_SampleCmpLevel = 237, + MOP_SampleCmpLevelZero = 238, + MOP_SampleGrad = 239, + MOP_SampleLevel = 240, + MOP_Gather = 241, + MOP_GatherAlpha = 242, + MOP_GatherBlue = 243, + MOP_GatherCmp = 244, + MOP_GatherCmpAlpha = 245, + MOP_GatherCmpBlue = 246, + MOP_GatherCmpGreen = 247, + MOP_GatherCmpRed = 248, + MOP_GatherGreen = 249, + MOP_GatherRaw = 250, + MOP_GatherRed = 251, + MOP_GetSamplePosition = 252, + MOP_Load2 = 253, + MOP_Load3 = 254, + MOP_Load4 = 255, + MOP_InterlockedAdd = 256, + MOP_InterlockedAdd64 = 257, + MOP_InterlockedAnd = 258, + MOP_InterlockedAnd64 = 259, + MOP_InterlockedCompareExchange = 260, + MOP_InterlockedCompareExchange64 = 261, + MOP_InterlockedCompareExchangeFloatBitwise = 262, + MOP_InterlockedCompareStore = 263, + MOP_InterlockedCompareStore64 = 264, + MOP_InterlockedCompareStoreFloatBitwise = 265, + MOP_InterlockedExchange = 266, + MOP_InterlockedExchange64 = 267, + MOP_InterlockedExchangeFloat = 268, + MOP_InterlockedMax = 269, + MOP_InterlockedMax64 = 270, + MOP_InterlockedMin = 271, + MOP_InterlockedMin64 = 272, + MOP_InterlockedOr = 273, + MOP_InterlockedOr64 = 274, + MOP_InterlockedXor = 275, + MOP_InterlockedXor64 = 276, + MOP_Store = 277, + MOP_Store2 = 278, + MOP_Store3 = 279, + MOP_Store4 = 280, + MOP_DecrementCounter = 281, + MOP_IncrementCounter = 282, + MOP_Consume = 283, + MOP_WriteSamplerFeedback = 284, + MOP_WriteSamplerFeedbackBias = 285, + MOP_WriteSamplerFeedbackGrad = 286, + MOP_WriteSamplerFeedbackLevel = 287, + MOP_Abort = 288, + MOP_CandidateGeometryIndex = 289, + MOP_CandidateInstanceContributionToHitGroupIndex = 290, + MOP_CandidateInstanceID = 291, + MOP_CandidateInstanceIndex = 292, + MOP_CandidateObjectRayDirection = 293, + MOP_CandidateObjectRayOrigin = 294, + MOP_CandidateObjectToWorld3x4 = 295, + MOP_CandidateObjectToWorld4x3 = 296, + MOP_CandidatePrimitiveIndex = 297, + MOP_CandidateProceduralPrimitiveNonOpaque = 298, + MOP_CandidateTriangleBarycentrics = 299, + MOP_CandidateTriangleFrontFace = 300, + MOP_CandidateTriangleRayT = 301, + MOP_CandidateType = 302, + MOP_CandidateWorldToObject3x4 = 303, + MOP_CandidateWorldToObject4x3 = 304, + MOP_CommitNonOpaqueTriangleHit = 305, + MOP_CommitProceduralPrimitiveHit = 306, + MOP_CommittedGeometryIndex = 307, + MOP_CommittedInstanceContributionToHitGroupIndex = 308, + MOP_CommittedInstanceID = 309, + MOP_CommittedInstanceIndex = 310, + MOP_CommittedObjectRayDirection = 311, + MOP_CommittedObjectRayOrigin = 312, + MOP_CommittedObjectToWorld3x4 = 313, + MOP_CommittedObjectToWorld4x3 = 314, + MOP_CommittedPrimitiveIndex = 315, + MOP_CommittedRayT = 316, + MOP_CommittedStatus = 317, + MOP_CommittedTriangleBarycentrics = 318, + MOP_CommittedTriangleFrontFace = 319, + MOP_CommittedWorldToObject3x4 = 320, + MOP_CommittedWorldToObject4x3 = 321, + MOP_Proceed = 322, + MOP_RayFlags = 323, + MOP_RayTMin = 324, + MOP_TraceRayInline = 325, + MOP_WorldRayDirection = 326, + MOP_WorldRayOrigin = 327, + MOP_Count = 328, + MOP_FinishedCrossGroupSharing = 329, + MOP_GetGroupNodeOutputRecords = 330, + MOP_GetThreadNodeOutputRecords = 331, + MOP_IsValid = 332, + MOP_GroupIncrementOutputCount = 333, + MOP_ThreadIncrementOutputCount = 334, + MOP_OutputComplete = 335, + MOP_SubpassLoad = 336, // unsigned - IOP_InterlockedUMax, - IOP_InterlockedUMin, - IOP_WaveActiveUMax, - IOP_WaveActiveUMin, - IOP_WaveActiveUProduct, - IOP_WaveActiveUSum, - IOP_WaveMultiPrefixUProduct, - IOP_WaveMultiPrefixUSum, - IOP_WavePrefixUProduct, - IOP_WavePrefixUSum, - IOP_uabs, - IOP_uclamp, - IOP_udot, - IOP_ufirstbithigh, - IOP_umad, - IOP_umax, - IOP_umin, - IOP_umul, - IOP_usign, - MOP_InterlockedUMax, - MOP_InterlockedUMin, - Num_Intrinsics, + IOP_InterlockedUMax = 337, + IOP_InterlockedUMin = 338, + IOP_WaveActiveUMax = 339, + IOP_WaveActiveUMin = 340, + IOP_WaveActiveUProduct = 341, + IOP_WaveActiveUSum = 342, + IOP_WaveMultiPrefixUProduct = 343, + IOP_WaveMultiPrefixUSum = 344, + IOP_WavePrefixUProduct = 345, + IOP_WavePrefixUSum = 346, + IOP_uabs = 347, + IOP_uclamp = 348, + IOP_udot = 349, + IOP_ufirstbithigh = 350, + IOP_umad = 351, + IOP_umax = 352, + IOP_umin = 353, + IOP_umul = 354, + IOP_usign = 355, + MOP_InterlockedUMax = 356, + MOP_InterlockedUMin = 357, + Num_Intrinsics = 358, }; inline bool HasUnsignedIntrinsicOpcode(IntrinsicOp opcode) { switch (opcode) { diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index b3e552da18..02e72fb401 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -43,2981 +43,3040 @@ import hctdb_instrhelp const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = { // OpCode OpCode name, OpCodeClass // OpCodeClass name, void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj, function attribute + // i16, i32, i64, udt, obj, vec, ext, function attribute, ext + // oload, vec oload // Temporary, indexable, input, output registers void, h, f, d, - // i1, i8, i16, i32, i64, udt, obj , function attribute - { - OC::TempRegLoad, - "TempRegLoad", - OCC::TempRegLoad, - "tempRegLoad", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::TempRegStore, - "TempRegStore", - OCC::TempRegStore, - "tempRegStore", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - { - OC::MinPrecXRegLoad, - "MinPrecXRegLoad", - OCC::MinPrecXRegLoad, - "minPrecXRegLoad", - {false, true, false, false, false, false, true, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::MinPrecXRegStore, - "MinPrecXRegStore", - OCC::MinPrecXRegStore, - "minPrecXRegStore", - {false, true, false, false, false, false, true, false, false, false, - false}, - Attribute::None, - }, - { - OC::LoadInput, - "LoadInput", - OCC::LoadInput, - "loadInput", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::StoreOutput, - "StoreOutput", - OCC::StoreOutput, - "storeOutput", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, + // i1, i8, i16, i32, i64, udt, obj, vec, function attribute, + // ext oload, vec oload + {OC::TempRegLoad, + "TempRegLoad", + OCC::TempRegLoad, + "tempRegLoad", + {false, true, true, false, false, false, true, true, false, false, false, + false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::TempRegStore, + "TempRegStore", + OCC::TempRegStore, + "tempRegStore", + {false, true, true, false, false, false, true, true, false, false, false, + false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::MinPrecXRegLoad, + "MinPrecXRegLoad", + OCC::MinPrecXRegLoad, + "minPrecXRegLoad", + {false, true, false, false, false, false, true, false, false, false, false, + false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::MinPrecXRegStore, + "MinPrecXRegStore", + OCC::MinPrecXRegStore, + "minPrecXRegStore", + {false, true, false, false, false, false, true, false, false, false, false, + false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::LoadInput, + "LoadInput", + OCC::LoadInput, + "loadInput", + {false, true, true, false, false, false, true, true, false, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::StoreOutput, + "StoreOutput", + OCC::StoreOutput, + "storeOutput", + {false, true, true, false, false, false, true, true, false, false, false, + false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Unary float void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::FAbs, - "FAbs", - OCC::Unary, - "unary", - {false, true, true, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Saturate, - "Saturate", - OCC::Unary, - "unary", - {false, true, true, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::IsNaN, - "IsNaN", - OCC::IsSpecialFloat, - "isSpecialFloat", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::IsInf, - "IsInf", - OCC::IsSpecialFloat, - "isSpecialFloat", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::IsFinite, - "IsFinite", - OCC::IsSpecialFloat, - "isSpecialFloat", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::IsNormal, - "IsNormal", - OCC::IsSpecialFloat, - "isSpecialFloat", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Cos, - "Cos", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Sin, - "Sin", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Tan, - "Tan", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Acos, - "Acos", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Asin, - "Asin", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Atan, - "Atan", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Hcos, - "Hcos", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Hsin, - "Hsin", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Htan, - "Htan", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Exp, - "Exp", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Frc, - "Frc", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Log, - "Log", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Sqrt, - "Sqrt", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Rsqrt, - "Rsqrt", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::FAbs, + "FAbs", + OCC::Unary, + "unary", + {false, true, true, true, false, false, false, false, false, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::Saturate, + "Saturate", + OCC::Unary, + "unary", + {false, true, true, true, false, false, false, false, false, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::IsNaN, + "IsNaN", + OCC::IsSpecialFloat, + "isSpecialFloat", + {false, true, true, false, false, false, false, false, false, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::IsInf, + "IsInf", + OCC::IsSpecialFloat, + "isSpecialFloat", + {false, true, true, false, false, false, false, false, false, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::IsFinite, + "IsFinite", + OCC::IsSpecialFloat, + "isSpecialFloat", + {false, true, true, false, false, false, false, false, false, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::IsNormal, + "IsNormal", + OCC::IsSpecialFloat, + "isSpecialFloat", + {false, true, true, false, false, false, false, false, false, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::Cos, + "Cos", + OCC::Unary, + "unary", + {false, true, true, false, false, false, false, false, false, false, false, + true, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x6}, {0x0}}}, + {OC::Sin, + "Sin", + OCC::Unary, + "unary", + {false, true, true, false, false, false, false, false, false, false, false, + true, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x6}, {0x0}}}, + {OC::Tan, + "Tan", + OCC::Unary, + "unary", + {false, true, true, false, false, false, false, false, false, false, false, + true, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x6}, {0x0}}}, + {OC::Acos, + "Acos", + OCC::Unary, + "unary", + {false, true, true, false, false, false, false, false, false, false, false, + true, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x6}, {0x0}}}, + {OC::Asin, + "Asin", + OCC::Unary, + "unary", + {false, true, true, false, false, false, false, false, false, false, false, + true, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x6}, {0x0}}}, + {OC::Atan, + "Atan", + OCC::Unary, + "unary", + {false, true, true, false, false, false, false, false, false, false, false, + true, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x6}, {0x0}}}, + {OC::Hcos, + "Hcos", + OCC::Unary, + "unary", + {false, true, true, false, false, false, false, false, false, false, false, + true, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x6}, {0x0}}}, + {OC::Hsin, + "Hsin", + OCC::Unary, + "unary", + {false, true, true, false, false, false, false, false, false, false, false, + true, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x6}, {0x0}}}, + {OC::Htan, + "Htan", + OCC::Unary, + "unary", + {false, true, true, false, false, false, false, false, false, false, false, + true, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x6}, {0x0}}}, + {OC::Exp, + "Exp", + OCC::Unary, + "unary", + {false, true, true, false, false, false, false, false, false, false, false, + true, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x6}, {0x0}}}, + {OC::Frc, + "Frc", + OCC::Unary, + "unary", + {false, true, true, false, false, false, false, false, false, false, false, + true, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x6}, {0x0}}}, + {OC::Log, + "Log", + OCC::Unary, + "unary", + {false, true, true, false, false, false, false, false, false, false, false, + true, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x6}, {0x0}}}, + {OC::Sqrt, + "Sqrt", + OCC::Unary, + "unary", + {false, true, true, false, false, false, false, false, false, false, false, + true, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x6}, {0x0}}}, + {OC::Rsqrt, + "Rsqrt", + OCC::Unary, + "unary", + {false, true, true, false, false, false, false, false, false, false, false, + true, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x6}, {0x0}}}, // Unary float - rounding void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::Round_ne, - "Round_ne", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Round_ni, - "Round_ni", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Round_pi, - "Round_pi", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Round_z, - "Round_z", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, + // i32, i64, udt, obj, vec, function attribute, ext oload, vec + // oload + {OC::Round_ne, + "Round_ne", + OCC::Unary, + "unary", + {false, true, true, false, false, false, false, false, false, false, false, + true, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x6}, {0x0}}}, + {OC::Round_ni, + "Round_ni", + OCC::Unary, + "unary", + {false, true, true, false, false, false, false, false, false, false, false, + true, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x6}, {0x0}}}, + {OC::Round_pi, + "Round_pi", + OCC::Unary, + "unary", + {false, true, true, false, false, false, false, false, false, false, false, + true, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x6}, {0x0}}}, + {OC::Round_z, + "Round_z", + OCC::Unary, + "unary", + {false, true, true, false, false, false, false, false, false, false, false, + true, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x6}, {0x0}}}, // Unary int void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::Bfrev, - "Bfrev", - OCC::Unary, - "unary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - { - OC::Countbits, - "Countbits", - OCC::UnaryBits, - "unaryBits", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - { - OC::FirstbitLo, - "FirstbitLo", - OCC::UnaryBits, - "unaryBits", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::Bfrev, + "Bfrev", + OCC::Unary, + "unary", + {false, false, false, false, false, false, true, true, true, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::Countbits, + "Countbits", + OCC::UnaryBits, + "unaryBits", + {false, false, false, false, false, false, true, true, true, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::FirstbitLo, + "FirstbitLo", + OCC::UnaryBits, + "unaryBits", + {false, false, false, false, false, false, true, true, true, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Unary uint void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::FirstbitHi, - "FirstbitHi", - OCC::UnaryBits, - "unaryBits", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::FirstbitHi, + "FirstbitHi", + OCC::UnaryBits, + "unaryBits", + {false, false, false, false, false, false, true, true, true, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Unary int void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::FirstbitSHi, - "FirstbitSHi", - OCC::UnaryBits, - "unaryBits", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::FirstbitSHi, + "FirstbitSHi", + OCC::UnaryBits, + "unaryBits", + {false, false, false, false, false, false, true, true, true, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Binary float void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::FMax, - "FMax", - OCC::Binary, - "binary", - {false, true, true, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::FMin, - "FMin", - OCC::Binary, - "binary", - {false, true, true, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::FMax, + "FMax", + OCC::Binary, + "binary", + {false, true, true, true, false, false, false, false, false, false, false, + true, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0xe}, {0x0}}}, + {OC::FMin, + "FMin", + OCC::Binary, + "binary", + {false, true, true, true, false, false, false, false, false, false, false, + true, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0xe}, {0x0}}}, // Binary int void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::IMax, - "IMax", - OCC::Binary, - "binary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - { - OC::IMin, - "IMin", - OCC::Binary, - "binary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::IMax, + "IMax", + OCC::Binary, + "binary", + {false, false, false, false, false, false, true, true, true, false, false, + true, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x1c0}, {0x0}}}, + {OC::IMin, + "IMin", + OCC::Binary, + "binary", + {false, false, false, false, false, false, true, true, true, false, false, + true, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x1c0}, {0x0}}}, // Binary uint void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::UMax, - "UMax", - OCC::Binary, - "binary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, - { - OC::UMin, - "UMin", - OCC::Binary, - "binary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::UMax, + "UMax", + OCC::Binary, + "binary", + {false, false, false, false, false, false, true, true, true, false, false, + true, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x1c0}, {0x0}}}, + {OC::UMin, + "UMin", + OCC::Binary, + "binary", + {false, false, false, false, false, false, true, true, true, false, false, + true, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x1c0}, {0x0}}}, // Binary int with two outputs void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::IMul, - "IMul", - OCC::BinaryWithTwoOuts, - "binaryWithTwoOuts", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, + // i32, i64, udt, obj, vec, function attribute, ext oload, vec + // oload + {OC::IMul, + "IMul", + OCC::BinaryWithTwoOuts, + "binaryWithTwoOuts", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Binary uint with two outputs void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::UMul, - "UMul", - OCC::BinaryWithTwoOuts, - "binaryWithTwoOuts", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::UDiv, - "UDiv", - OCC::BinaryWithTwoOuts, - "binaryWithTwoOuts", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, + // i16, i32, i64, udt, obj, vec, function attribute, ext oload, + // vec oload + {OC::UMul, + "UMul", + OCC::BinaryWithTwoOuts, + "binaryWithTwoOuts", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::UDiv, + "UDiv", + OCC::BinaryWithTwoOuts, + "binaryWithTwoOuts", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Binary uint with carry or borrow void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::UAddc, - "UAddc", - OCC::BinaryWithCarryOrBorrow, - "binaryWithCarryOrBorrow", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::USubb, - "USubb", - OCC::BinaryWithCarryOrBorrow, - "binaryWithCarryOrBorrow", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, + // i16, i32, i64, udt, obj, vec, function attribute, ext oload, + // vec oload + {OC::UAddc, + "UAddc", + OCC::BinaryWithCarryOrBorrow, + "binaryWithCarryOrBorrow", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::USubb, + "USubb", + OCC::BinaryWithCarryOrBorrow, + "binaryWithCarryOrBorrow", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Tertiary float void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::FMad, - "FMad", - OCC::Tertiary, - "tertiary", - {false, true, true, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Fma, - "Fma", - OCC::Tertiary, - "tertiary", - {false, false, false, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, + // i64, udt, obj, vec, function attribute, ext oload, vec oload + {OC::FMad, + "FMad", + OCC::Tertiary, + "tertiary", + {false, true, true, true, false, false, false, false, false, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::Fma, + "Fma", + OCC::Tertiary, + "tertiary", + {false, false, false, true, false, false, false, false, false, false, + false, true, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x8}, {0x0}}}, // Tertiary int void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::IMad, - "IMad", - OCC::Tertiary, - "tertiary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::IMad, + "IMad", + OCC::Tertiary, + "tertiary", + {false, false, false, false, false, false, true, true, true, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Tertiary uint void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::UMad, - "UMad", - OCC::Tertiary, - "tertiary", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadNone, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::UMad, + "UMad", + OCC::Tertiary, + "tertiary", + {false, false, false, false, false, false, true, true, true, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Tertiary int void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::Msad, - "Msad", - OCC::Tertiary, - "tertiary", - {false, false, false, false, false, false, false, true, true, false, - false}, - Attribute::ReadNone, - }, - { - OC::Ibfe, - "Ibfe", - OCC::Tertiary, - "tertiary", - {false, false, false, false, false, false, false, true, true, false, - false}, - Attribute::ReadNone, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::Msad, + "Msad", + OCC::Tertiary, + "tertiary", + {false, false, false, false, false, false, false, true, true, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::Ibfe, + "Ibfe", + OCC::Tertiary, + "tertiary", + {false, false, false, false, false, false, false, true, true, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Tertiary uint void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::Ubfe, - "Ubfe", - OCC::Tertiary, - "tertiary", - {false, false, false, false, false, false, false, true, true, false, - false}, - Attribute::ReadNone, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::Ubfe, + "Ubfe", + OCC::Tertiary, + "tertiary", + {false, false, false, false, false, false, false, true, true, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Quaternary void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::Bfi, - "Bfi", - OCC::Quaternary, - "quaternary", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::Bfi, + "Bfi", + OCC::Quaternary, + "quaternary", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Dot void, h, f, d, i1, i8, i16, i32, i64, udt, - // obj , function attribute - { - OC::Dot2, - "Dot2", - OCC::Dot2, - "dot2", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Dot3, - "Dot3", - OCC::Dot3, - "dot3", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Dot4, - "Dot4", - OCC::Dot4, - "dot4", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, + // obj, vec, function attribute, ext oload, vec oload + {OC::Dot2, + "Dot2", + OCC::Dot2, + "dot2", + {false, true, true, false, false, false, false, false, false, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::Dot3, + "Dot3", + OCC::Dot3, + "dot3", + {false, true, true, false, false, false, false, false, false, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::Dot4, + "Dot4", + OCC::Dot4, + "dot4", + {false, true, true, false, false, false, false, false, false, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Resources void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::CreateHandle, - "CreateHandle", - OCC::CreateHandle, - "createHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::CBufferLoad, - "CBufferLoad", - OCC::CBufferLoad, - "cbufferLoad", - {false, true, true, true, false, true, true, true, true, false, false}, - Attribute::ReadOnly, - }, - { - OC::CBufferLoadLegacy, - "CBufferLoadLegacy", - OCC::CBufferLoadLegacy, - "cbufferLoadLegacy", - {false, true, true, true, false, false, true, true, true, false, false}, - Attribute::ReadOnly, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::CreateHandle, + "CreateHandle", + OCC::CreateHandle, + "createHandle", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::CBufferLoad, + "CBufferLoad", + OCC::CBufferLoad, + "cbufferLoad", + {false, true, true, true, false, true, true, true, true, false, false, + false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::CBufferLoadLegacy, + "CBufferLoadLegacy", + OCC::CBufferLoadLegacy, + "cbufferLoadLegacy", + {false, true, true, true, false, false, true, true, true, false, false, + false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Resources - sample void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::Sample, - "Sample", - OCC::Sample, - "sample", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::SampleBias, - "SampleBias", - OCC::SampleBias, - "sampleBias", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::SampleLevel, - "SampleLevel", - OCC::SampleLevel, - "sampleLevel", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::SampleGrad, - "SampleGrad", - OCC::SampleGrad, - "sampleGrad", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::SampleCmp, - "SampleCmp", - OCC::SampleCmp, - "sampleCmp", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::SampleCmpLevelZero, - "SampleCmpLevelZero", - OCC::SampleCmpLevelZero, - "sampleCmpLevelZero", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, + // i64, udt, obj, vec, function attribute, ext oload, vec oload + {OC::Sample, + "Sample", + OCC::Sample, + "sample", + {false, true, true, false, false, false, true, true, false, false, false, + false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::SampleBias, + "SampleBias", + OCC::SampleBias, + "sampleBias", + {false, true, true, false, false, false, true, true, false, false, false, + false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::SampleLevel, + "SampleLevel", + OCC::SampleLevel, + "sampleLevel", + {false, true, true, false, false, false, true, true, false, false, false, + false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::SampleGrad, + "SampleGrad", + OCC::SampleGrad, + "sampleGrad", + {false, true, true, false, false, false, true, true, false, false, false, + false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::SampleCmp, + "SampleCmp", + OCC::SampleCmp, + "sampleCmp", + {false, true, true, false, false, false, false, false, false, false, false, + false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::SampleCmpLevelZero, + "SampleCmpLevelZero", + OCC::SampleCmpLevelZero, + "sampleCmpLevelZero", + {false, true, true, false, false, false, false, false, false, false, false, + false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Resources void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::TextureLoad, - "TextureLoad", - OCC::TextureLoad, - "textureLoad", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::TextureStore, - "TextureStore", - OCC::TextureStore, - "textureStore", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - { - OC::BufferLoad, - "BufferLoad", - OCC::BufferLoad, - "bufferLoad", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::BufferStore, - "BufferStore", - OCC::BufferStore, - "bufferStore", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - { - OC::BufferUpdateCounter, - "BufferUpdateCounter", - OCC::BufferUpdateCounter, - "bufferUpdateCounter", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::CheckAccessFullyMapped, - "CheckAccessFullyMapped", - OCC::CheckAccessFullyMapped, - "checkAccessFullyMapped", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::GetDimensions, - "GetDimensions", - OCC::GetDimensions, - "getDimensions", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::TextureLoad, + "TextureLoad", + OCC::TextureLoad, + "textureLoad", + {false, true, true, false, false, false, true, true, false, false, false, + false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::TextureStore, + "TextureStore", + OCC::TextureStore, + "textureStore", + {false, true, true, false, false, false, true, true, false, false, false, + false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::BufferLoad, + "BufferLoad", + OCC::BufferLoad, + "bufferLoad", + {false, true, true, false, false, false, true, true, false, false, false, + false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::BufferStore, + "BufferStore", + OCC::BufferStore, + "bufferStore", + {false, true, true, false, false, false, true, true, false, false, false, + false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::BufferUpdateCounter, + "BufferUpdateCounter", + OCC::BufferUpdateCounter, + "bufferUpdateCounter", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::CheckAccessFullyMapped, + "CheckAccessFullyMapped", + OCC::CheckAccessFullyMapped, + "checkAccessFullyMapped", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::GetDimensions, + "GetDimensions", + OCC::GetDimensions, + "getDimensions", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Resources - gather void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::TextureGather, - "TextureGather", - OCC::TextureGather, - "textureGather", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::TextureGatherCmp, - "TextureGatherCmp", - OCC::TextureGatherCmp, - "textureGatherCmp", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadOnly, - }, + // i64, udt, obj, vec, function attribute, ext oload, vec oload + {OC::TextureGather, + "TextureGather", + OCC::TextureGather, + "textureGather", + {false, true, true, false, false, false, true, true, false, false, false, + false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::TextureGatherCmp, + "TextureGatherCmp", + OCC::TextureGatherCmp, + "textureGatherCmp", + {false, true, true, false, false, false, true, true, false, false, false, + false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Resources - sample void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::Texture2DMSGetSamplePosition, - "Texture2DMSGetSamplePosition", - OCC::Texture2DMSGetSamplePosition, - "texture2DMSGetSamplePosition", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RenderTargetGetSamplePosition, - "RenderTargetGetSamplePosition", - OCC::RenderTargetGetSamplePosition, - "renderTargetGetSamplePosition", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RenderTargetGetSampleCount, - "RenderTargetGetSampleCount", - OCC::RenderTargetGetSampleCount, - "renderTargetGetSampleCount", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, + // i64, udt, obj, vec, function attribute, ext oload, vec oload + {OC::Texture2DMSGetSamplePosition, + "Texture2DMSGetSamplePosition", + OCC::Texture2DMSGetSamplePosition, + "texture2DMSGetSamplePosition", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RenderTargetGetSamplePosition, + "RenderTargetGetSamplePosition", + OCC::RenderTargetGetSamplePosition, + "renderTargetGetSamplePosition", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RenderTargetGetSampleCount, + "RenderTargetGetSampleCount", + OCC::RenderTargetGetSampleCount, + "renderTargetGetSampleCount", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Synchronization void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::AtomicBinOp, - "AtomicBinOp", - OCC::AtomicBinOp, - "atomicBinOp", - {false, false, false, false, false, false, false, true, true, false, - false}, - Attribute::None, - }, - { - OC::AtomicCompareExchange, - "AtomicCompareExchange", - OCC::AtomicCompareExchange, - "atomicCompareExchange", - {false, false, false, false, false, false, false, true, true, false, - false}, - Attribute::None, - }, - { - OC::Barrier, - "Barrier", - OCC::Barrier, - "barrier", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::NoDuplicate, - }, + // i64, udt, obj, vec, function attribute, ext oload, vec oload + {OC::AtomicBinOp, + "AtomicBinOp", + OCC::AtomicBinOp, + "atomicBinOp", + {false, false, false, false, false, false, false, true, true, false, false, + false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::AtomicCompareExchange, + "AtomicCompareExchange", + OCC::AtomicCompareExchange, + "atomicCompareExchange", + {false, false, false, false, false, false, false, true, true, false, false, + false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::Barrier, + "Barrier", + OCC::Barrier, + "barrier", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::NoDuplicate, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Derivatives void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::CalculateLOD, - "CalculateLOD", - OCC::CalculateLOD, - "calculateLOD", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::CalculateLOD, + "CalculateLOD", + OCC::CalculateLOD, + "calculateLOD", + {false, false, true, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Pixel shader void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::Discard, - "Discard", - OCC::Discard, - "discard", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::Discard, + "Discard", + OCC::Discard, + "discard", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Derivatives void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::DerivCoarseX, - "DerivCoarseX", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::DerivCoarseY, - "DerivCoarseY", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::DerivFineX, - "DerivFineX", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::DerivFineY, - "DerivFineY", - OCC::Unary, - "unary", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::DerivCoarseX, + "DerivCoarseX", + OCC::Unary, + "unary", + {false, true, true, false, false, false, false, false, false, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::DerivCoarseY, + "DerivCoarseY", + OCC::Unary, + "unary", + {false, true, true, false, false, false, false, false, false, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::DerivFineX, + "DerivFineX", + OCC::Unary, + "unary", + {false, true, true, false, false, false, false, false, false, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::DerivFineY, + "DerivFineY", + OCC::Unary, + "unary", + {false, true, true, false, false, false, false, false, false, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Pixel shader void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::EvalSnapped, - "EvalSnapped", - OCC::EvalSnapped, - "evalSnapped", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::EvalSampleIndex, - "EvalSampleIndex", - OCC::EvalSampleIndex, - "evalSampleIndex", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::EvalCentroid, - "EvalCentroid", - OCC::EvalCentroid, - "evalCentroid", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::SampleIndex, - "SampleIndex", - OCC::SampleIndex, - "sampleIndex", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Coverage, - "Coverage", - OCC::Coverage, - "coverage", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::InnerCoverage, - "InnerCoverage", - OCC::InnerCoverage, - "innerCoverage", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::EvalSnapped, + "EvalSnapped", + OCC::EvalSnapped, + "evalSnapped", + {false, true, true, false, false, false, false, false, false, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::EvalSampleIndex, + "EvalSampleIndex", + OCC::EvalSampleIndex, + "evalSampleIndex", + {false, true, true, false, false, false, false, false, false, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::EvalCentroid, + "EvalCentroid", + OCC::EvalCentroid, + "evalCentroid", + {false, true, true, false, false, false, false, false, false, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::SampleIndex, + "SampleIndex", + OCC::SampleIndex, + "sampleIndex", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::Coverage, + "Coverage", + OCC::Coverage, + "coverage", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::InnerCoverage, + "InnerCoverage", + OCC::InnerCoverage, + "innerCoverage", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Compute/Mesh/Amplification/Node shader void, h, f, d, i1, - // i8, i16, i32, i64, udt, obj , function attribute - { - OC::ThreadId, - "ThreadId", - OCC::ThreadId, - "threadId", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::GroupId, - "GroupId", - OCC::GroupId, - "groupId", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::ThreadIdInGroup, - "ThreadIdInGroup", - OCC::ThreadIdInGroup, - "threadIdInGroup", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::FlattenedThreadIdInGroup, - "FlattenedThreadIdInGroup", - OCC::FlattenedThreadIdInGroup, - "flattenedThreadIdInGroup", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, + // i8, i16, i32, i64, udt, obj, vec, function attribute, ext + // oload, vec oload + {OC::ThreadId, + "ThreadId", + OCC::ThreadId, + "threadId", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::GroupId, + "GroupId", + OCC::GroupId, + "groupId", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ThreadIdInGroup, + "ThreadIdInGroup", + OCC::ThreadIdInGroup, + "threadIdInGroup", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::FlattenedThreadIdInGroup, + "FlattenedThreadIdInGroup", + OCC::FlattenedThreadIdInGroup, + "flattenedThreadIdInGroup", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Geometry shader void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::EmitStream, - "EmitStream", - OCC::EmitStream, - "emitStream", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::CutStream, - "CutStream", - OCC::CutStream, - "cutStream", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::EmitThenCutStream, - "EmitThenCutStream", - OCC::EmitThenCutStream, - "emitThenCutStream", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::GSInstanceID, - "GSInstanceID", - OCC::GSInstanceID, - "gsInstanceID", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, + // i64, udt, obj, vec, function attribute, ext oload, vec oload + {OC::EmitStream, + "EmitStream", + OCC::EmitStream, + "emitStream", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::CutStream, + "CutStream", + OCC::CutStream, + "cutStream", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::EmitThenCutStream, + "EmitThenCutStream", + OCC::EmitThenCutStream, + "emitThenCutStream", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::GSInstanceID, + "GSInstanceID", + OCC::GSInstanceID, + "gsInstanceID", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Double precision void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::MakeDouble, - "MakeDouble", - OCC::MakeDouble, - "makeDouble", - {false, false, false, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::SplitDouble, - "SplitDouble", - OCC::SplitDouble, - "splitDouble", - {false, false, false, true, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, + // i64, udt, obj, vec, function attribute, ext oload, vec oload + {OC::MakeDouble, + "MakeDouble", + OCC::MakeDouble, + "makeDouble", + {false, false, false, true, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::SplitDouble, + "SplitDouble", + OCC::SplitDouble, + "splitDouble", + {false, false, false, true, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Domain and hull shader void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::LoadOutputControlPoint, - "LoadOutputControlPoint", - OCC::LoadOutputControlPoint, - "loadOutputControlPoint", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::LoadPatchConstant, - "LoadPatchConstant", - OCC::LoadPatchConstant, - "loadPatchConstant", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadNone, - }, + // i32, i64, udt, obj, vec, function attribute, ext oload, vec + // oload + {OC::LoadOutputControlPoint, + "LoadOutputControlPoint", + OCC::LoadOutputControlPoint, + "loadOutputControlPoint", + {false, true, true, false, false, false, true, true, false, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::LoadPatchConstant, + "LoadPatchConstant", + OCC::LoadPatchConstant, + "loadPatchConstant", + {false, true, true, false, false, false, true, true, false, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Domain shader void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::DomainLocation, - "DomainLocation", - OCC::DomainLocation, - "domainLocation", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::DomainLocation, + "DomainLocation", + OCC::DomainLocation, + "domainLocation", + {false, false, true, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Hull shader void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::StorePatchConstant, - "StorePatchConstant", - OCC::StorePatchConstant, - "storePatchConstant", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - { - OC::OutputControlPointID, - "OutputControlPointID", - OCC::OutputControlPointID, - "outputControlPointID", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::StorePatchConstant, + "StorePatchConstant", + OCC::StorePatchConstant, + "storePatchConstant", + {false, true, true, false, false, false, true, true, false, false, false, + false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::OutputControlPointID, + "OutputControlPointID", + OCC::OutputControlPointID, + "outputControlPointID", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Hull, Domain and Geometry shaders void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::PrimitiveID, - "PrimitiveID", - OCC::PrimitiveID, - "primitiveID", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, + // i16, i32, i64, udt, obj, vec, function attribute, ext oload, + // vec oload + {OC::PrimitiveID, + "PrimitiveID", + OCC::PrimitiveID, + "primitiveID", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Other void, h, f, d, i1, i8, i16, i32, i64, udt, - // obj , function attribute - { - OC::CycleCounterLegacy, - "CycleCounterLegacy", - OCC::CycleCounterLegacy, - "cycleCounterLegacy", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, + // obj, vec, function attribute, ext oload, vec oload + {OC::CycleCounterLegacy, + "CycleCounterLegacy", + OCC::CycleCounterLegacy, + "cycleCounterLegacy", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Wave void, h, f, d, i1, i8, i16, i32, i64, udt, - // obj , function attribute - { - OC::WaveIsFirstLane, - "WaveIsFirstLane", - OCC::WaveIsFirstLane, - "waveIsFirstLane", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WaveGetLaneIndex, - "WaveGetLaneIndex", - OCC::WaveGetLaneIndex, - "waveGetLaneIndex", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::WaveGetLaneCount, - "WaveGetLaneCount", - OCC::WaveGetLaneCount, - "waveGetLaneCount", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::WaveAnyTrue, - "WaveAnyTrue", - OCC::WaveAnyTrue, - "waveAnyTrue", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WaveAllTrue, - "WaveAllTrue", - OCC::WaveAllTrue, - "waveAllTrue", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WaveActiveAllEqual, - "WaveActiveAllEqual", - OCC::WaveActiveAllEqual, - "waveActiveAllEqual", - {false, true, true, true, true, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::WaveActiveBallot, - "WaveActiveBallot", - OCC::WaveActiveBallot, - "waveActiveBallot", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WaveReadLaneAt, - "WaveReadLaneAt", - OCC::WaveReadLaneAt, - "waveReadLaneAt", - {false, true, true, true, true, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::WaveReadLaneFirst, - "WaveReadLaneFirst", - OCC::WaveReadLaneFirst, - "waveReadLaneFirst", - {false, true, true, true, true, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::WaveActiveOp, - "WaveActiveOp", - OCC::WaveActiveOp, - "waveActiveOp", - {false, true, true, true, true, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::WaveActiveBit, - "WaveActiveBit", - OCC::WaveActiveBit, - "waveActiveBit", - {false, false, false, false, false, true, true, true, true, false, - false}, - Attribute::None, - }, - { - OC::WavePrefixOp, - "WavePrefixOp", - OCC::WavePrefixOp, - "wavePrefixOp", - {false, true, true, true, false, true, true, true, true, false, false}, - Attribute::None, - }, + // obj, vec, function attribute, ext oload, vec oload + {OC::WaveIsFirstLane, + "WaveIsFirstLane", + OCC::WaveIsFirstLane, + "waveIsFirstLane", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::WaveGetLaneIndex, + "WaveGetLaneIndex", + OCC::WaveGetLaneIndex, + "waveGetLaneIndex", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::WaveGetLaneCount, + "WaveGetLaneCount", + OCC::WaveGetLaneCount, + "waveGetLaneCount", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::WaveAnyTrue, + "WaveAnyTrue", + OCC::WaveAnyTrue, + "waveAnyTrue", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::WaveAllTrue, + "WaveAllTrue", + OCC::WaveAllTrue, + "waveAllTrue", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::WaveActiveAllEqual, + "WaveActiveAllEqual", + OCC::WaveActiveAllEqual, + "waveActiveAllEqual", + {false, true, true, true, true, true, true, true, true, false, false, + false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::WaveActiveBallot, + "WaveActiveBallot", + OCC::WaveActiveBallot, + "waveActiveBallot", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::WaveReadLaneAt, + "WaveReadLaneAt", + OCC::WaveReadLaneAt, + "waveReadLaneAt", + {false, true, true, true, true, true, true, true, true, false, false, + false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::WaveReadLaneFirst, + "WaveReadLaneFirst", + OCC::WaveReadLaneFirst, + "waveReadLaneFirst", + {false, true, true, true, true, true, true, true, true, false, false, + false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::WaveActiveOp, + "WaveActiveOp", + OCC::WaveActiveOp, + "waveActiveOp", + {false, true, true, true, true, true, true, true, true, false, false, + false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::WaveActiveBit, + "WaveActiveBit", + OCC::WaveActiveBit, + "waveActiveBit", + {false, false, false, false, false, true, true, true, true, false, false, + false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::WavePrefixOp, + "WavePrefixOp", + OCC::WavePrefixOp, + "wavePrefixOp", + {false, true, true, true, false, true, true, true, true, false, false, + false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Quad Wave Ops void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::QuadReadLaneAt, - "QuadReadLaneAt", - OCC::QuadReadLaneAt, - "quadReadLaneAt", - {false, true, true, true, true, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::QuadOp, - "QuadOp", - OCC::QuadOp, - "quadOp", - {false, true, true, true, false, true, true, true, true, false, false}, - Attribute::None, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::QuadReadLaneAt, + "QuadReadLaneAt", + OCC::QuadReadLaneAt, + "quadReadLaneAt", + {false, true, true, true, true, true, true, true, true, false, false, + false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::QuadOp, + "QuadOp", + OCC::QuadOp, + "quadOp", + {false, true, true, true, false, true, true, true, true, false, false, + false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Bitcasts with different sizes void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::BitcastI16toF16, - "BitcastI16toF16", - OCC::BitcastI16toF16, - "bitcastI16toF16", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::BitcastF16toI16, - "BitcastF16toI16", - OCC::BitcastF16toI16, - "bitcastF16toI16", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::BitcastI32toF32, - "BitcastI32toF32", - OCC::BitcastI32toF32, - "bitcastI32toF32", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::BitcastF32toI32, - "BitcastF32toI32", - OCC::BitcastF32toI32, - "bitcastF32toI32", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::BitcastI64toF64, - "BitcastI64toF64", - OCC::BitcastI64toF64, - "bitcastI64toF64", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::BitcastF64toI64, - "BitcastF64toI64", - OCC::BitcastF64toI64, - "bitcastF64toI64", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, + // i16, i32, i64, udt, obj, vec, function attribute, ext oload, + // vec oload + {OC::BitcastI16toF16, + "BitcastI16toF16", + OCC::BitcastI16toF16, + "bitcastI16toF16", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::BitcastF16toI16, + "BitcastF16toI16", + OCC::BitcastF16toI16, + "bitcastF16toI16", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::BitcastI32toF32, + "BitcastI32toF32", + OCC::BitcastI32toF32, + "bitcastI32toF32", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::BitcastF32toI32, + "BitcastF32toI32", + OCC::BitcastF32toI32, + "bitcastF32toI32", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::BitcastI64toF64, + "BitcastI64toF64", + OCC::BitcastI64toF64, + "bitcastI64toF64", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::BitcastF64toI64, + "BitcastF64toI64", + OCC::BitcastF64toI64, + "bitcastF64toI64", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Legacy floating-point void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::LegacyF32ToF16, - "LegacyF32ToF16", - OCC::LegacyF32ToF16, - "legacyF32ToF16", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::LegacyF16ToF32, - "LegacyF16ToF32", - OCC::LegacyF16ToF32, - "legacyF16ToF32", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, + // i32, i64, udt, obj, vec, function attribute, ext oload, vec + // oload + {OC::LegacyF32ToF16, + "LegacyF32ToF16", + OCC::LegacyF32ToF16, + "legacyF32ToF16", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::LegacyF16ToF32, + "LegacyF16ToF32", + OCC::LegacyF16ToF32, + "legacyF16ToF32", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Double precision void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::LegacyDoubleToFloat, - "LegacyDoubleToFloat", - OCC::LegacyDoubleToFloat, - "legacyDoubleToFloat", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::LegacyDoubleToSInt32, - "LegacyDoubleToSInt32", - OCC::LegacyDoubleToSInt32, - "legacyDoubleToSInt32", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::LegacyDoubleToUInt32, - "LegacyDoubleToUInt32", - OCC::LegacyDoubleToUInt32, - "legacyDoubleToUInt32", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, + // i64, udt, obj, vec, function attribute, ext oload, vec oload + {OC::LegacyDoubleToFloat, + "LegacyDoubleToFloat", + OCC::LegacyDoubleToFloat, + "legacyDoubleToFloat", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::LegacyDoubleToSInt32, + "LegacyDoubleToSInt32", + OCC::LegacyDoubleToSInt32, + "legacyDoubleToSInt32", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::LegacyDoubleToUInt32, + "LegacyDoubleToUInt32", + OCC::LegacyDoubleToUInt32, + "legacyDoubleToUInt32", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Wave void, h, f, d, i1, i8, i16, i32, i64, udt, - // obj , function attribute - { - OC::WaveAllBitCount, - "WaveAllBitCount", - OCC::WaveAllOp, - "waveAllOp", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WavePrefixBitCount, - "WavePrefixBitCount", - OCC::WavePrefixOp, - "wavePrefixOp", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, + // obj, vec, function attribute, ext oload, vec oload + {OC::WaveAllBitCount, + "WaveAllBitCount", + OCC::WaveAllOp, + "waveAllOp", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::WavePrefixBitCount, + "WavePrefixBitCount", + OCC::WavePrefixOp, + "wavePrefixOp", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Pixel shader void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::AttributeAtVertex, - "AttributeAtVertex", - OCC::AttributeAtVertex, - "attributeAtVertex", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::ReadNone, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::AttributeAtVertex, + "AttributeAtVertex", + OCC::AttributeAtVertex, + "attributeAtVertex", + {false, true, true, false, false, false, true, true, false, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Graphics shader void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::ViewID, - "ViewID", - OCC::ViewID, - "viewID", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, + // i64, udt, obj, vec, function attribute, ext oload, vec oload + {OC::ViewID, + "ViewID", + OCC::ViewID, + "viewID", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Resources void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::RawBufferLoad, - "RawBufferLoad", - OCC::RawBufferLoad, - "rawBufferLoad", - {false, true, true, true, false, false, true, true, true, false, false}, - Attribute::ReadOnly, - }, - { - OC::RawBufferStore, - "RawBufferStore", - OCC::RawBufferStore, - "rawBufferStore", - {false, true, true, true, false, false, true, true, true, false, false}, - Attribute::None, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::RawBufferLoad, + "RawBufferLoad", + OCC::RawBufferLoad, + "rawBufferLoad", + {false, true, true, true, false, false, true, true, true, false, false, + false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RawBufferStore, + "RawBufferStore", + OCC::RawBufferStore, + "rawBufferStore", + {false, true, true, true, false, false, true, true, true, false, false, + false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Raytracing object space uint System Values void, h, f, d, i1, - // i8, i16, i32, i64, udt, obj , function attribute - { - OC::InstanceID, - "InstanceID", - OCC::InstanceID, - "instanceID", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::InstanceIndex, - "InstanceIndex", - OCC::InstanceIndex, - "instanceIndex", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, + // i8, i16, i32, i64, udt, obj, vec, function attribute, ext + // oload, vec oload + {OC::InstanceID, + "InstanceID", + OCC::InstanceID, + "instanceID", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::InstanceIndex, + "InstanceIndex", + OCC::InstanceIndex, + "instanceIndex", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Raytracing hit uint System Values void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::HitKind, - "HitKind", - OCC::HitKind, - "hitKind", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, + // i16, i32, i64, udt, obj, vec, function attribute, ext oload, + // vec oload + {OC::HitKind, + "HitKind", + OCC::HitKind, + "hitKind", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Raytracing uint System Values void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::RayFlags, - "RayFlags", - OCC::RayFlags, - "rayFlags", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, + // i16, i32, i64, udt, obj, vec, function attribute, ext oload, + // vec oload + {OC::RayFlags, + "RayFlags", + OCC::RayFlags, + "rayFlags", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Ray Dispatch Arguments void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::DispatchRaysIndex, - "DispatchRaysIndex", - OCC::DispatchRaysIndex, - "dispatchRaysIndex", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::DispatchRaysDimensions, - "DispatchRaysDimensions", - OCC::DispatchRaysDimensions, - "dispatchRaysDimensions", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, + // i32, i64, udt, obj, vec, function attribute, ext oload, vec + // oload + {OC::DispatchRaysIndex, + "DispatchRaysIndex", + OCC::DispatchRaysIndex, + "dispatchRaysIndex", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::DispatchRaysDimensions, + "DispatchRaysDimensions", + OCC::DispatchRaysDimensions, + "dispatchRaysDimensions", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Ray Vectors void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::WorldRayOrigin, - "WorldRayOrigin", - OCC::WorldRayOrigin, - "worldRayOrigin", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::WorldRayDirection, - "WorldRayDirection", - OCC::WorldRayDirection, - "worldRayDirection", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::WorldRayOrigin, + "WorldRayOrigin", + OCC::WorldRayOrigin, + "worldRayOrigin", + {false, false, true, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::WorldRayDirection, + "WorldRayDirection", + OCC::WorldRayDirection, + "worldRayDirection", + {false, false, true, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Ray object space Vectors void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::ObjectRayOrigin, - "ObjectRayOrigin", - OCC::ObjectRayOrigin, - "objectRayOrigin", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::ObjectRayDirection, - "ObjectRayDirection", - OCC::ObjectRayDirection, - "objectRayDirection", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, + // i32, i64, udt, obj, vec, function attribute, ext oload, vec + // oload + {OC::ObjectRayOrigin, + "ObjectRayOrigin", + OCC::ObjectRayOrigin, + "objectRayOrigin", + {false, false, true, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ObjectRayDirection, + "ObjectRayDirection", + OCC::ObjectRayDirection, + "objectRayDirection", + {false, false, true, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Ray Transforms void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::ObjectToWorld, - "ObjectToWorld", - OCC::ObjectToWorld, - "objectToWorld", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::WorldToObject, - "WorldToObject", - OCC::WorldToObject, - "worldToObject", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, + // i64, udt, obj, vec, function attribute, ext oload, vec oload + {OC::ObjectToWorld, + "ObjectToWorld", + OCC::ObjectToWorld, + "objectToWorld", + {false, false, true, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::WorldToObject, + "WorldToObject", + OCC::WorldToObject, + "worldToObject", + {false, false, true, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // RayT void, h, f, d, i1, i8, i16, i32, i64, udt, - // obj , function attribute - { - OC::RayTMin, - "RayTMin", - OCC::RayTMin, - "rayTMin", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::RayTCurrent, - "RayTCurrent", - OCC::RayTCurrent, - "rayTCurrent", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, + // obj, vec, function attribute, ext oload, vec oload + {OC::RayTMin, + "RayTMin", + OCC::RayTMin, + "rayTMin", + {false, false, true, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayTCurrent, + "RayTCurrent", + OCC::RayTCurrent, + "rayTCurrent", + {false, false, true, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // AnyHit Terminals void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::IgnoreHit, - "IgnoreHit", - OCC::IgnoreHit, - "ignoreHit", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::NoReturn, - }, - { - OC::AcceptHitAndEndSearch, - "AcceptHitAndEndSearch", - OCC::AcceptHitAndEndSearch, - "acceptHitAndEndSearch", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::NoReturn, - }, + // i64, udt, obj, vec, function attribute, ext oload, vec oload + {OC::IgnoreHit, + "IgnoreHit", + OCC::IgnoreHit, + "ignoreHit", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::NoReturn, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::AcceptHitAndEndSearch, + "AcceptHitAndEndSearch", + OCC::AcceptHitAndEndSearch, + "acceptHitAndEndSearch", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::NoReturn, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Indirect Shader Invocation void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::TraceRay, - "TraceRay", - OCC::TraceRay, - "traceRay", - {false, false, false, false, false, false, false, false, false, true, - false}, - Attribute::None, - }, - { - OC::ReportHit, - "ReportHit", - OCC::ReportHit, - "reportHit", - {false, false, false, false, false, false, false, false, false, true, - false}, - Attribute::None, - }, - { - OC::CallShader, - "CallShader", - OCC::CallShader, - "callShader", - {false, false, false, false, false, false, false, false, false, true, - false}, - Attribute::None, - }, + // i32, i64, udt, obj, vec, function attribute, ext oload, vec + // oload + {OC::TraceRay, + "TraceRay", + OCC::TraceRay, + "traceRay", + {false, false, false, false, false, false, false, false, false, true, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReportHit, + "ReportHit", + OCC::ReportHit, + "reportHit", + {false, false, false, false, false, false, false, false, false, true, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::CallShader, + "CallShader", + OCC::CallShader, + "callShader", + {false, false, false, false, false, false, false, false, false, true, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Library create handle from resource struct (like HL intrinsic) void, h, - // f, d, i1, i8, i16, i32, i64, udt, obj , function - // attribute - { - OC::CreateHandleForLib, - "CreateHandleForLib", - OCC::CreateHandleForLib, - "createHandleForLib", - {false, false, false, false, false, false, false, false, false, false, - true}, - Attribute::ReadOnly, - }, + // f, d, i1, i8, i16, i32, i64, udt, obj, vec, + // function attribute, ext oload, vec oload + {OC::CreateHandleForLib, + "CreateHandleForLib", + OCC::CreateHandleForLib, + "createHandleForLib", + {false, false, false, false, false, false, false, false, false, false, + true, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Raytracing object space uint System Values void, h, f, d, i1, - // i8, i16, i32, i64, udt, obj , function attribute - { - OC::PrimitiveIndex, - "PrimitiveIndex", - OCC::PrimitiveIndex, - "primitiveIndex", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, + // i8, i16, i32, i64, udt, obj, vec, function attribute, ext + // oload, vec oload + {OC::PrimitiveIndex, + "PrimitiveIndex", + OCC::PrimitiveIndex, + "primitiveIndex", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Dot product with accumulate void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::Dot2AddHalf, - "Dot2AddHalf", - OCC::Dot2AddHalf, - "dot2AddHalf", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Dot4AddI8Packed, - "Dot4AddI8Packed", - OCC::Dot4AddPacked, - "dot4AddPacked", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::Dot4AddU8Packed, - "Dot4AddU8Packed", - OCC::Dot4AddPacked, - "dot4AddPacked", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, + // i32, i64, udt, obj, vec, function attribute, ext oload, vec + // oload + {OC::Dot2AddHalf, + "Dot2AddHalf", + OCC::Dot2AddHalf, + "dot2AddHalf", + {false, false, true, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::Dot4AddI8Packed, + "Dot4AddI8Packed", + OCC::Dot4AddPacked, + "dot4AddPacked", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::Dot4AddU8Packed, + "Dot4AddU8Packed", + OCC::Dot4AddPacked, + "dot4AddPacked", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Wave void, h, f, d, i1, i8, i16, i32, i64, udt, - // obj , function attribute - { - OC::WaveMatch, - "WaveMatch", - OCC::WaveMatch, - "waveMatch", - {false, true, true, true, false, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::WaveMultiPrefixOp, - "WaveMultiPrefixOp", - OCC::WaveMultiPrefixOp, - "waveMultiPrefixOp", - {false, true, true, true, false, true, true, true, true, false, false}, - Attribute::None, - }, - { - OC::WaveMultiPrefixBitCount, - "WaveMultiPrefixBitCount", - OCC::WaveMultiPrefixBitCount, - "waveMultiPrefixBitCount", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, + // obj, vec, function attribute, ext oload, vec oload + {OC::WaveMatch, + "WaveMatch", + OCC::WaveMatch, + "waveMatch", + {false, true, true, true, false, true, true, true, true, false, false, + false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::WaveMultiPrefixOp, + "WaveMultiPrefixOp", + OCC::WaveMultiPrefixOp, + "waveMultiPrefixOp", + {false, true, true, true, false, true, true, true, true, false, false, + false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::WaveMultiPrefixBitCount, + "WaveMultiPrefixBitCount", + OCC::WaveMultiPrefixBitCount, + "waveMultiPrefixBitCount", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Mesh shader instructions void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::SetMeshOutputCounts, - "SetMeshOutputCounts", - OCC::SetMeshOutputCounts, - "setMeshOutputCounts", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::EmitIndices, - "EmitIndices", - OCC::EmitIndices, - "emitIndices", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::GetMeshPayload, - "GetMeshPayload", - OCC::GetMeshPayload, - "getMeshPayload", - {false, false, false, false, false, false, false, false, false, true, - false}, - Attribute::ReadOnly, - }, - { - OC::StoreVertexOutput, - "StoreVertexOutput", - OCC::StoreVertexOutput, - "storeVertexOutput", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - { - OC::StorePrimitiveOutput, - "StorePrimitiveOutput", - OCC::StorePrimitiveOutput, - "storePrimitiveOutput", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, + // i32, i64, udt, obj, vec, function attribute, ext oload, vec + // oload + {OC::SetMeshOutputCounts, + "SetMeshOutputCounts", + OCC::SetMeshOutputCounts, + "setMeshOutputCounts", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::EmitIndices, + "EmitIndices", + OCC::EmitIndices, + "emitIndices", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::GetMeshPayload, + "GetMeshPayload", + OCC::GetMeshPayload, + "getMeshPayload", + {false, false, false, false, false, false, false, false, false, true, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::StoreVertexOutput, + "StoreVertexOutput", + OCC::StoreVertexOutput, + "storeVertexOutput", + {false, true, true, false, false, false, true, true, false, false, false, + false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::StorePrimitiveOutput, + "StorePrimitiveOutput", + OCC::StorePrimitiveOutput, + "storePrimitiveOutput", + {false, true, true, false, false, false, true, true, false, false, false, + false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Amplification shader instructions void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::DispatchMesh, - "DispatchMesh", - OCC::DispatchMesh, - "dispatchMesh", - {false, false, false, false, false, false, false, false, false, true, - false}, - Attribute::None, - }, + // i16, i32, i64, udt, obj, vec, function attribute, ext oload, + // vec oload + {OC::DispatchMesh, + "DispatchMesh", + OCC::DispatchMesh, + "dispatchMesh", + {false, false, false, false, false, false, false, false, false, true, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Sampler Feedback void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::WriteSamplerFeedback, - "WriteSamplerFeedback", - OCC::WriteSamplerFeedback, - "writeSamplerFeedback", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WriteSamplerFeedbackBias, - "WriteSamplerFeedbackBias", - OCC::WriteSamplerFeedbackBias, - "writeSamplerFeedbackBias", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WriteSamplerFeedbackLevel, - "WriteSamplerFeedbackLevel", - OCC::WriteSamplerFeedbackLevel, - "writeSamplerFeedbackLevel", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::WriteSamplerFeedbackGrad, - "WriteSamplerFeedbackGrad", - OCC::WriteSamplerFeedbackGrad, - "writeSamplerFeedbackGrad", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, + // i64, udt, obj, vec, function attribute, ext oload, vec oload + {OC::WriteSamplerFeedback, + "WriteSamplerFeedback", + OCC::WriteSamplerFeedback, + "writeSamplerFeedback", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::WriteSamplerFeedbackBias, + "WriteSamplerFeedbackBias", + OCC::WriteSamplerFeedbackBias, + "writeSamplerFeedbackBias", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::WriteSamplerFeedbackLevel, + "WriteSamplerFeedbackLevel", + OCC::WriteSamplerFeedbackLevel, + "writeSamplerFeedbackLevel", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::WriteSamplerFeedbackGrad, + "WriteSamplerFeedbackGrad", + OCC::WriteSamplerFeedbackGrad, + "writeSamplerFeedbackGrad", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Inline Ray Query void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::AllocateRayQuery, - "AllocateRayQuery", - OCC::AllocateRayQuery, - "allocateRayQuery", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::RayQuery_TraceRayInline, - "RayQuery_TraceRayInline", - OCC::RayQuery_TraceRayInline, - "rayQuery_TraceRayInline", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::RayQuery_Proceed, - "RayQuery_Proceed", - OCC::RayQuery_Proceed, - "rayQuery_Proceed", - {false, false, false, false, true, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::RayQuery_Abort, - "RayQuery_Abort", - OCC::RayQuery_Abort, - "rayQuery_Abort", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::RayQuery_CommitNonOpaqueTriangleHit, - "RayQuery_CommitNonOpaqueTriangleHit", - OCC::RayQuery_CommitNonOpaqueTriangleHit, - "rayQuery_CommitNonOpaqueTriangleHit", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::RayQuery_CommitProceduralPrimitiveHit, - "RayQuery_CommitProceduralPrimitiveHit", - OCC::RayQuery_CommitProceduralPrimitiveHit, - "rayQuery_CommitProceduralPrimitiveHit", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::RayQuery_CommittedStatus, - "RayQuery_CommittedStatus", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateType, - "RayQuery_CandidateType", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateObjectToWorld3x4, - "RayQuery_CandidateObjectToWorld3x4", - OCC::RayQuery_StateMatrix, - "rayQuery_StateMatrix", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateWorldToObject3x4, - "RayQuery_CandidateWorldToObject3x4", - OCC::RayQuery_StateMatrix, - "rayQuery_StateMatrix", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedObjectToWorld3x4, - "RayQuery_CommittedObjectToWorld3x4", - OCC::RayQuery_StateMatrix, - "rayQuery_StateMatrix", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedWorldToObject3x4, - "RayQuery_CommittedWorldToObject3x4", - OCC::RayQuery_StateMatrix, - "rayQuery_StateMatrix", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateProceduralPrimitiveNonOpaque, - "RayQuery_CandidateProceduralPrimitiveNonOpaque", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, true, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateTriangleFrontFace, - "RayQuery_CandidateTriangleFrontFace", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, true, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedTriangleFrontFace, - "RayQuery_CommittedTriangleFrontFace", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, true, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateTriangleBarycentrics, - "RayQuery_CandidateTriangleBarycentrics", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedTriangleBarycentrics, - "RayQuery_CommittedTriangleBarycentrics", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_RayFlags, - "RayQuery_RayFlags", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_WorldRayOrigin, - "RayQuery_WorldRayOrigin", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_WorldRayDirection, - "RayQuery_WorldRayDirection", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_RayTMin, - "RayQuery_RayTMin", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateTriangleRayT, - "RayQuery_CandidateTriangleRayT", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedRayT, - "RayQuery_CommittedRayT", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateInstanceIndex, - "RayQuery_CandidateInstanceIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateInstanceID, - "RayQuery_CandidateInstanceID", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateGeometryIndex, - "RayQuery_CandidateGeometryIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidatePrimitiveIndex, - "RayQuery_CandidatePrimitiveIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateObjectRayOrigin, - "RayQuery_CandidateObjectRayOrigin", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CandidateObjectRayDirection, - "RayQuery_CandidateObjectRayDirection", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedInstanceIndex, - "RayQuery_CommittedInstanceIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedInstanceID, - "RayQuery_CommittedInstanceID", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedGeometryIndex, - "RayQuery_CommittedGeometryIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedPrimitiveIndex, - "RayQuery_CommittedPrimitiveIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedObjectRayOrigin, - "RayQuery_CommittedObjectRayOrigin", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedObjectRayDirection, - "RayQuery_CommittedObjectRayDirection", - OCC::RayQuery_StateVector, - "rayQuery_StateVector", - {false, false, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, + // i64, udt, obj, vec, function attribute, ext oload, vec oload + {OC::AllocateRayQuery, + "AllocateRayQuery", + OCC::AllocateRayQuery, + "allocateRayQuery", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_TraceRayInline, + "RayQuery_TraceRayInline", + OCC::RayQuery_TraceRayInline, + "rayQuery_TraceRayInline", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_Proceed, + "RayQuery_Proceed", + OCC::RayQuery_Proceed, + "rayQuery_Proceed", + {false, false, false, false, true, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_Abort, + "RayQuery_Abort", + OCC::RayQuery_Abort, + "rayQuery_Abort", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CommitNonOpaqueTriangleHit, + "RayQuery_CommitNonOpaqueTriangleHit", + OCC::RayQuery_CommitNonOpaqueTriangleHit, + "rayQuery_CommitNonOpaqueTriangleHit", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CommitProceduralPrimitiveHit, + "RayQuery_CommitProceduralPrimitiveHit", + OCC::RayQuery_CommitProceduralPrimitiveHit, + "rayQuery_CommitProceduralPrimitiveHit", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CommittedStatus, + "RayQuery_CommittedStatus", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CandidateType, + "RayQuery_CandidateType", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CandidateObjectToWorld3x4, + "RayQuery_CandidateObjectToWorld3x4", + OCC::RayQuery_StateMatrix, + "rayQuery_StateMatrix", + {false, false, true, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CandidateWorldToObject3x4, + "RayQuery_CandidateWorldToObject3x4", + OCC::RayQuery_StateMatrix, + "rayQuery_StateMatrix", + {false, false, true, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CommittedObjectToWorld3x4, + "RayQuery_CommittedObjectToWorld3x4", + OCC::RayQuery_StateMatrix, + "rayQuery_StateMatrix", + {false, false, true, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CommittedWorldToObject3x4, + "RayQuery_CommittedWorldToObject3x4", + OCC::RayQuery_StateMatrix, + "rayQuery_StateMatrix", + {false, false, true, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CandidateProceduralPrimitiveNonOpaque, + "RayQuery_CandidateProceduralPrimitiveNonOpaque", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + {false, false, false, false, true, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CandidateTriangleFrontFace, + "RayQuery_CandidateTriangleFrontFace", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + {false, false, false, false, true, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CommittedTriangleFrontFace, + "RayQuery_CommittedTriangleFrontFace", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + {false, false, false, false, true, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CandidateTriangleBarycentrics, + "RayQuery_CandidateTriangleBarycentrics", + OCC::RayQuery_StateVector, + "rayQuery_StateVector", + {false, false, true, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CommittedTriangleBarycentrics, + "RayQuery_CommittedTriangleBarycentrics", + OCC::RayQuery_StateVector, + "rayQuery_StateVector", + {false, false, true, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_RayFlags, + "RayQuery_RayFlags", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_WorldRayOrigin, + "RayQuery_WorldRayOrigin", + OCC::RayQuery_StateVector, + "rayQuery_StateVector", + {false, false, true, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_WorldRayDirection, + "RayQuery_WorldRayDirection", + OCC::RayQuery_StateVector, + "rayQuery_StateVector", + {false, false, true, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_RayTMin, + "RayQuery_RayTMin", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + {false, false, true, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CandidateTriangleRayT, + "RayQuery_CandidateTriangleRayT", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + {false, false, true, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CommittedRayT, + "RayQuery_CommittedRayT", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + {false, false, true, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CandidateInstanceIndex, + "RayQuery_CandidateInstanceIndex", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CandidateInstanceID, + "RayQuery_CandidateInstanceID", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CandidateGeometryIndex, + "RayQuery_CandidateGeometryIndex", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CandidatePrimitiveIndex, + "RayQuery_CandidatePrimitiveIndex", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CandidateObjectRayOrigin, + "RayQuery_CandidateObjectRayOrigin", + OCC::RayQuery_StateVector, + "rayQuery_StateVector", + {false, false, true, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CandidateObjectRayDirection, + "RayQuery_CandidateObjectRayDirection", + OCC::RayQuery_StateVector, + "rayQuery_StateVector", + {false, false, true, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CommittedInstanceIndex, + "RayQuery_CommittedInstanceIndex", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CommittedInstanceID, + "RayQuery_CommittedInstanceID", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CommittedGeometryIndex, + "RayQuery_CommittedGeometryIndex", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CommittedPrimitiveIndex, + "RayQuery_CommittedPrimitiveIndex", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CommittedObjectRayOrigin, + "RayQuery_CommittedObjectRayOrigin", + OCC::RayQuery_StateVector, + "rayQuery_StateVector", + {false, false, true, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CommittedObjectRayDirection, + "RayQuery_CommittedObjectRayDirection", + OCC::RayQuery_StateVector, + "rayQuery_StateVector", + {false, false, true, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Raytracing object space uint System Values, raytracing tier 1.1 void, h, - // f, d, i1, i8, i16, i32, i64, udt, obj , function - // attribute - { - OC::GeometryIndex, - "GeometryIndex", - OCC::GeometryIndex, - "geometryIndex", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, + // f, d, i1, i8, i16, i32, i64, udt, obj, vec, + // function attribute, ext oload, vec oload + {OC::GeometryIndex, + "GeometryIndex", + OCC::GeometryIndex, + "geometryIndex", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Inline Ray Query void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::RayQuery_CandidateInstanceContributionToHitGroupIndex, - "RayQuery_CandidateInstanceContributionToHitGroupIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::RayQuery_CommittedInstanceContributionToHitGroupIndex, - "RayQuery_CommittedInstanceContributionToHitGroupIndex", - OCC::RayQuery_StateScalar, - "rayQuery_StateScalar", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadOnly, - }, + // i64, udt, obj, vec, function attribute, ext oload, vec oload + {OC::RayQuery_CandidateInstanceContributionToHitGroupIndex, + "RayQuery_CandidateInstanceContributionToHitGroupIndex", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RayQuery_CommittedInstanceContributionToHitGroupIndex, + "RayQuery_CommittedInstanceContributionToHitGroupIndex", + OCC::RayQuery_StateScalar, + "rayQuery_StateScalar", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Get handle from heap void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::AnnotateHandle, - "AnnotateHandle", - OCC::AnnotateHandle, - "annotateHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::CreateHandleFromBinding, - "CreateHandleFromBinding", - OCC::CreateHandleFromBinding, - "createHandleFromBinding", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::CreateHandleFromHeap, - "CreateHandleFromHeap", - OCC::CreateHandleFromHeap, - "createHandleFromHeap", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, + // i64, udt, obj, vec, function attribute, ext oload, vec oload + {OC::AnnotateHandle, + "AnnotateHandle", + OCC::AnnotateHandle, + "annotateHandle", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::CreateHandleFromBinding, + "CreateHandleFromBinding", + OCC::CreateHandleFromBinding, + "createHandleFromBinding", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::CreateHandleFromHeap, + "CreateHandleFromHeap", + OCC::CreateHandleFromHeap, + "createHandleFromHeap", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Unpacking intrinsics void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::Unpack4x8, - "Unpack4x8", - OCC::Unpack4x8, - "unpack4x8", - {false, false, false, false, false, false, true, true, false, false, - false}, - Attribute::ReadNone, - }, + // i64, udt, obj, vec, function attribute, ext oload, vec oload + {OC::Unpack4x8, + "Unpack4x8", + OCC::Unpack4x8, + "unpack4x8", + {false, false, false, false, false, false, true, true, false, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Packing intrinsics void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::Pack4x8, - "Pack4x8", - OCC::Pack4x8, - "pack4x8", - {false, false, false, false, false, false, true, true, false, false, - false}, - Attribute::ReadNone, - }, + // i64, udt, obj, vec, function attribute, ext oload, vec oload + {OC::Pack4x8, + "Pack4x8", + OCC::Pack4x8, + "pack4x8", + {false, false, false, false, false, false, true, true, false, false, false, + false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Helper Lanes void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::IsHelperLane, - "IsHelperLane", - OCC::IsHelperLane, - "isHelperLane", - {false, false, false, false, true, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::IsHelperLane, + "IsHelperLane", + OCC::IsHelperLane, + "isHelperLane", + {false, false, false, false, true, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Quad Wave Ops void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::QuadVote, - "QuadVote", - OCC::QuadVote, - "quadVote", - {false, false, false, false, true, false, false, false, false, false, - false}, - Attribute::None, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::QuadVote, + "QuadVote", + OCC::QuadVote, + "quadVote", + {false, false, false, false, true, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Resources - gather void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::TextureGatherRaw, - "TextureGatherRaw", - OCC::TextureGatherRaw, - "textureGatherRaw", - {false, false, false, false, false, false, true, true, true, false, - false}, - Attribute::ReadOnly, - }, + // i64, udt, obj, vec, function attribute, ext oload, vec oload + {OC::TextureGatherRaw, + "TextureGatherRaw", + OCC::TextureGatherRaw, + "textureGatherRaw", + {false, false, false, false, false, false, true, true, true, false, false, + false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Resources - sample void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::SampleCmpLevel, - "SampleCmpLevel", - OCC::SampleCmpLevel, - "sampleCmpLevel", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, + // i64, udt, obj, vec, function attribute, ext oload, vec oload + {OC::SampleCmpLevel, + "SampleCmpLevel", + OCC::SampleCmpLevel, + "sampleCmpLevel", + {false, true, true, false, false, false, false, false, false, false, false, + false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Resources void, h, f, d, i1, i8, i16, i32, i64, - // udt, obj , function attribute - { - OC::TextureStoreSample, - "TextureStoreSample", - OCC::TextureStoreSample, - "textureStoreSample", - {false, true, true, false, false, false, true, true, false, false, - false}, - Attribute::None, - }, - - // void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute - { - OC::Reserved0, - "Reserved0", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved1, - "Reserved1", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved2, - "Reserved2", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved3, - "Reserved3", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved4, - "Reserved4", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved5, - "Reserved5", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved6, - "Reserved6", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved7, - "Reserved7", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved8, - "Reserved8", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved9, - "Reserved9", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved10, - "Reserved10", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::Reserved11, - "Reserved11", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, + // udt, obj, vec, function attribute, ext oload, vec oload + {OC::TextureStoreSample, + "TextureStoreSample", + OCC::TextureStoreSample, + "textureStoreSample", + {false, true, true, false, false, false, true, true, false, false, false, + false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + + // void, h, f, d, i1, i8, i16, i32, i64, udt, obj, vec, function attribute, ext oload, vec oload + {OC::Reserved0, + "Reserved0", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::Reserved1, + "Reserved1", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::Reserved2, + "Reserved2", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::Reserved3, + "Reserved3", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::Reserved4, + "Reserved4", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::Reserved5, + "Reserved5", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::Reserved6, + "Reserved6", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::Reserved7, + "Reserved7", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::Reserved8, + "Reserved8", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::Reserved9, + "Reserved9", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::Reserved10, + "Reserved10", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::Reserved11, + "Reserved11", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Create/Annotate Node Handles void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::AllocateNodeOutputRecords, - "AllocateNodeOutputRecords", - OCC::AllocateNodeOutputRecords, - "allocateNodeOutputRecords", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, + // i16, i32, i64, udt, obj, vec, function attribute, ext oload, + // vec oload + {OC::AllocateNodeOutputRecords, + "AllocateNodeOutputRecords", + OCC::AllocateNodeOutputRecords, + "allocateNodeOutputRecords", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Get Pointer to Node Record in Address Space 6 void, h, f, d, - // i1, i8, i16, i32, i64, udt, obj , function attribute - { - OC::GetNodeRecordPtr, - "GetNodeRecordPtr", - OCC::GetNodeRecordPtr, - "getNodeRecordPtr", - {false, false, false, false, false, false, false, false, false, true, - false}, - Attribute::ReadNone, - }, + // i1, i8, i16, i32, i64, udt, obj, vec, function attribute, + // ext oload, vec oload + {OC::GetNodeRecordPtr, + "GetNodeRecordPtr", + OCC::GetNodeRecordPtr, + "getNodeRecordPtr", + {false, false, false, false, false, false, false, false, false, true, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Work Graph intrinsics void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::IncrementOutputCount, - "IncrementOutputCount", - OCC::IncrementOutputCount, - "incrementOutputCount", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::OutputComplete, - "OutputComplete", - OCC::OutputComplete, - "outputComplete", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::GetInputRecordCount, - "GetInputRecordCount", - OCC::GetInputRecordCount, - "getInputRecordCount", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::FinishedCrossGroupSharing, - "FinishedCrossGroupSharing", - OCC::FinishedCrossGroupSharing, - "finishedCrossGroupSharing", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, + // i32, i64, udt, obj, vec, function attribute, ext oload, vec + // oload + {OC::IncrementOutputCount, + "IncrementOutputCount", + OCC::IncrementOutputCount, + "incrementOutputCount", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::OutputComplete, + "OutputComplete", + OCC::OutputComplete, + "outputComplete", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::GetInputRecordCount, + "GetInputRecordCount", + OCC::GetInputRecordCount, + "getInputRecordCount", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::FinishedCrossGroupSharing, + "FinishedCrossGroupSharing", + OCC::FinishedCrossGroupSharing, + "finishedCrossGroupSharing", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Synchronization void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::BarrierByMemoryType, - "BarrierByMemoryType", - OCC::BarrierByMemoryType, - "barrierByMemoryType", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::NoDuplicate, - }, - { - OC::BarrierByMemoryHandle, - "BarrierByMemoryHandle", - OCC::BarrierByMemoryHandle, - "barrierByMemoryHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::NoDuplicate, - }, - { - OC::BarrierByNodeRecordHandle, - "BarrierByNodeRecordHandle", - OCC::BarrierByNodeRecordHandle, - "barrierByNodeRecordHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::NoDuplicate, - }, + // i64, udt, obj, vec, function attribute, ext oload, vec oload + {OC::BarrierByMemoryType, + "BarrierByMemoryType", + OCC::BarrierByMemoryType, + "barrierByMemoryType", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::NoDuplicate, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::BarrierByMemoryHandle, + "BarrierByMemoryHandle", + OCC::BarrierByMemoryHandle, + "barrierByMemoryHandle", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::NoDuplicate, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::BarrierByNodeRecordHandle, + "BarrierByNodeRecordHandle", + OCC::BarrierByNodeRecordHandle, + "barrierByNodeRecordHandle", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::NoDuplicate, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Create/Annotate Node Handles void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::CreateNodeOutputHandle, - "CreateNodeOutputHandle", - OCC::createNodeOutputHandle, - "createNodeOutputHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::IndexNodeHandle, - "IndexNodeHandle", - OCC::IndexNodeHandle, - "indexNodeHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::AnnotateNodeHandle, - "AnnotateNodeHandle", - OCC::AnnotateNodeHandle, - "annotateNodeHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::CreateNodeInputRecordHandle, - "CreateNodeInputRecordHandle", - OCC::CreateNodeInputRecordHandle, - "createNodeInputRecordHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::AnnotateNodeRecordHandle, - "AnnotateNodeRecordHandle", - OCC::AnnotateNodeRecordHandle, - "annotateNodeRecordHandle", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadNone, - }, + // i16, i32, i64, udt, obj, vec, function attribute, ext oload, + // vec oload + {OC::CreateNodeOutputHandle, + "CreateNodeOutputHandle", + OCC::createNodeOutputHandle, + "createNodeOutputHandle", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::IndexNodeHandle, + "IndexNodeHandle", + OCC::IndexNodeHandle, + "indexNodeHandle", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::AnnotateNodeHandle, + "AnnotateNodeHandle", + OCC::AnnotateNodeHandle, + "annotateNodeHandle", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::CreateNodeInputRecordHandle, + "CreateNodeInputRecordHandle", + OCC::CreateNodeInputRecordHandle, + "createNodeInputRecordHandle", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::AnnotateNodeRecordHandle, + "AnnotateNodeRecordHandle", + OCC::AnnotateNodeRecordHandle, + "annotateNodeRecordHandle", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Work Graph intrinsics void, h, f, d, i1, i8, i16, - // i32, i64, udt, obj , function attribute - { - OC::NodeOutputIsValid, - "NodeOutputIsValid", - OCC::NodeOutputIsValid, - "nodeOutputIsValid", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::GetRemainingRecursionLevels, - "GetRemainingRecursionLevels", - OCC::GetRemainingRecursionLevels, - "getRemainingRecursionLevels", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, + // i32, i64, udt, obj, vec, function attribute, ext oload, vec + // oload + {OC::NodeOutputIsValid, + "NodeOutputIsValid", + OCC::NodeOutputIsValid, + "nodeOutputIsValid", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::GetRemainingRecursionLevels, + "GetRemainingRecursionLevels", + OCC::GetRemainingRecursionLevels, + "getRemainingRecursionLevels", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Comparison Samples void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::SampleCmpGrad, - "SampleCmpGrad", - OCC::SampleCmpGrad, - "sampleCmpGrad", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, - { - OC::SampleCmpBias, - "SampleCmpBias", - OCC::SampleCmpBias, - "sampleCmpBias", - {false, true, true, false, false, false, false, false, false, false, - false}, - Attribute::ReadOnly, - }, + // i64, udt, obj, vec, function attribute, ext oload, vec oload + {OC::SampleCmpGrad, + "SampleCmpGrad", + OCC::SampleCmpGrad, + "sampleCmpGrad", + {false, true, true, false, false, false, false, false, false, false, false, + false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::SampleCmpBias, + "SampleCmpBias", + OCC::SampleCmpBias, + "sampleCmpBias", + {false, true, true, false, false, false, false, false, false, false, false, + false, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Extended Command Information void, h, f, d, i1, i8, - // i16, i32, i64, udt, obj , function attribute - { - OC::StartVertexLocation, - "StartVertexLocation", - OCC::StartVertexLocation, - "startVertexLocation", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, - { - OC::StartInstanceLocation, - "StartInstanceLocation", - OCC::StartInstanceLocation, - "startInstanceLocation", - {false, false, false, false, false, false, false, true, false, false, - false}, - Attribute::ReadNone, - }, + // i16, i32, i64, udt, obj, vec, function attribute, ext oload, + // vec oload + {OC::StartVertexLocation, + "StartVertexLocation", + OCC::StartVertexLocation, + "startVertexLocation", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::StartInstanceLocation, + "StartInstanceLocation", + OCC::StartInstanceLocation, + "startInstanceLocation", + {false, false, false, false, false, false, false, true, false, false, + false, false, false}, + Attribute::ReadNone, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, // Inline Ray Query void, h, f, d, i1, i8, i16, i32, - // i64, udt, obj , function attribute - { - OC::AllocateRayQuery2, - "AllocateRayQuery2", - OCC::AllocateRayQuery2, - "allocateRayQuery2", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - - // void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute - { - OC::ReservedA0, - "ReservedA0", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedA1, - "ReservedA1", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedA2, - "ReservedA2", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB0, - "ReservedB0", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB1, - "ReservedB1", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB2, - "ReservedB2", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB3, - "ReservedB3", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB4, - "ReservedB4", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB5, - "ReservedB5", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB6, - "ReservedB6", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB7, - "ReservedB7", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB8, - "ReservedB8", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB9, - "ReservedB9", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB10, - "ReservedB10", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB11, - "ReservedB11", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB12, - "ReservedB12", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB13, - "ReservedB13", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB14, - "ReservedB14", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB15, - "ReservedB15", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB16, - "ReservedB16", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB17, - "ReservedB17", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB18, - "ReservedB18", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB19, - "ReservedB19", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB20, - "ReservedB20", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB21, - "ReservedB21", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB22, - "ReservedB22", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB23, - "ReservedB23", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB24, - "ReservedB24", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB25, - "ReservedB25", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB26, - "ReservedB26", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB27, - "ReservedB27", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB28, - "ReservedB28", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB29, - "ReservedB29", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedB30, - "ReservedB30", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC0, - "ReservedC0", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC1, - "ReservedC1", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC2, - "ReservedC2", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC3, - "ReservedC3", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC4, - "ReservedC4", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC5, - "ReservedC5", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC6, - "ReservedC6", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC7, - "ReservedC7", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC8, - "ReservedC8", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, - { - OC::ReservedC9, - "ReservedC9", - OCC::Reserved, - "reserved", - {true, false, false, false, false, false, false, false, false, false, - false}, - Attribute::None, - }, + // i64, udt, obj, vec, function attribute, ext oload, vec oload + {OC::AllocateRayQuery2, + "AllocateRayQuery2", + OCC::AllocateRayQuery2, + "allocateRayQuery2", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + + // void, h, f, d, i1, i8, i16, i32, i64, udt, obj, vec, function attribute, ext oload, vec oload + {OC::ReservedA0, + "ReservedA0", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedA1, + "ReservedA1", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedA2, + "ReservedA2", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB0, + "ReservedB0", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB1, + "ReservedB1", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB2, + "ReservedB2", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB3, + "ReservedB3", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB4, + "ReservedB4", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB5, + "ReservedB5", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB6, + "ReservedB6", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB7, + "ReservedB7", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB8, + "ReservedB8", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB9, + "ReservedB9", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB10, + "ReservedB10", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB11, + "ReservedB11", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB12, + "ReservedB12", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB13, + "ReservedB13", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB14, + "ReservedB14", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB15, + "ReservedB15", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB16, + "ReservedB16", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB17, + "ReservedB17", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB18, + "ReservedB18", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB19, + "ReservedB19", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB20, + "ReservedB20", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB21, + "ReservedB21", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB22, + "ReservedB22", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB23, + "ReservedB23", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB24, + "ReservedB24", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB25, + "ReservedB25", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB26, + "ReservedB26", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB27, + "ReservedB27", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB28, + "ReservedB28", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB29, + "ReservedB29", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedB30, + "ReservedB30", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedC0, + "ReservedC0", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedC1, + "ReservedC1", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedC2, + "ReservedC2", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedC3, + "ReservedC3", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedC4, + "ReservedC4", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedC5, + "ReservedC5", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedC6, + "ReservedC6", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedC7, + "ReservedC7", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedC8, + "ReservedC8", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::ReservedC9, + "ReservedC9", + OCC::Reserved, + "reserved", + {true, false, false, false, false, false, false, false, false, false, + false, false, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x0}, {0x0}}}, + {OC::RawBufferVectorLoad, + "RawBufferVectorLoad", + OCC::RawBufferVectorLoad, + "rawBufferVectorLoad", + {false, true, true, true, false, false, true, true, true, false, false, + true, false}, + Attribute::ReadOnly, + {{0x0}, {0x0}}, + {{0x1ce}, {0x0}}}, + {OC::RawBufferVectorStore, + "RawBufferVectorStore", + OCC::RawBufferVectorStore, + "rawBufferVectorStore", + {false, true, true, true, false, false, true, true, true, false, false, + true, false}, + Attribute::None, + {{0x0}, {0x0}}, + {{0x1ce}, {0x0}}}, }; // OPCODE-OLOADS:END const char *OP::m_OverloadTypeName[kNumTypeOverloads] = { - "void", "f16", "f32", "f64", "i1", "i8", - "i16", "i32", "i64", "udt", "obj", // These should not be used + "void", "f16", "f32", "f64", "i1", "i8", "i16", + "i32", "i64", "udt", "obj", "vec", "ext", + // "udt", "obj", "vec", and "ext" should not be used }; const char *OP::m_NamePrefix = "dx.op."; @@ -3067,7 +3126,14 @@ unsigned OP::GetTypeSlot(Type *pType) { return GetTypeSlot(pType); } case Type::StructTyID: - return kObjectTypeSlot; + // Named struct value (not pointer) indicates a built-in object type. + // Anonymous struct value is used to wrap multi-overload dimensions. + if (cast(pType)->hasName()) + return kObjectTypeSlot; + else + return kExtendedTypeSlot; + case Type::VectorTyID: + return kVectorTypeSlot; default: break; } @@ -3091,6 +3157,26 @@ llvm::StringRef OP::GetTypeName(Type *Ty, std::string &str) { } else if (TypeSlot == kObjectTypeSlot) { StructType *ST = cast(Ty); return ST->getStructName(); + } else if (TypeSlot == kVectorTypeSlot) { + VectorType *VecTy = cast(Ty); + str = "v"; + str += std::to_string(VecTy->getNumElements()); + str += GetOverloadTypeName(OP::GetTypeSlot(VecTy->getElementType())); + return str; + } else if (TypeSlot == kExtendedTypeSlot) { + DXASSERT(isa(Ty), + "otherwise, extended overload type not wrapped in struct type."); + StructType *ST = cast(Ty); + DXASSERT(ST->getNumElements() <= DXIL::kDxilMaxOloadDims, + "otherwise, extended overload has too many dimensions."); + // Iterate extended slots, recurse, separate with '.' + for (unsigned I = 0; I < ST->getNumElements(); ++I) { + if (I > 0) + str += "."; + std::string TempStr; + str += GetTypeName(ST->getElementType(I), TempStr); + } + return str; } else { raw_string_ostream os(str); Ty->print(os); @@ -3138,13 +3224,43 @@ llvm::Attribute::AttrKind OP::GetMemAccessAttr(OpCode opCode) { } bool OP::IsOverloadLegal(OpCode opCode, Type *pType) { + auto &OpProps = m_OpCodeProps[static_cast(opCode)]; if (!pType) return false; if (opCode == OpCode::NumOpCodes) return false; unsigned TypeSlot = GetTypeSlot(pType); - return TypeSlot != UINT_MAX && - m_OpCodeProps[(unsigned)opCode].bAllowOverload[TypeSlot]; + if (TypeSlot >= kNumTypeOverloads) + return false; + if (!OpProps.bAllowOverload[TypeSlot]) + return false; + + if (TypeSlot == kVectorTypeSlot) { + unsigned EltTypeSlot = + GetTypeSlot(cast(pType)->getElementType()); + return OpProps.AllowedVectorElements[0][EltTypeSlot]; + } + + if (TypeSlot == kExtendedTypeSlot) { + StructType *ST = cast(pType); + if (ST->getNumElements() < 2 || + ST->getNumElements() > DXIL::kDxilMaxOloadDims) + return false; + for (unsigned I = 0; I < ST->getNumElements(); ++I) { + Type *ElTy = ST->getElementType(I); + unsigned OloadSlot = GetTypeSlot(ElTy); + if (!OpProps.ExtendedOverloads[I][OloadSlot]) + return false; + if (OloadSlot == kVectorTypeSlot) { + unsigned EltTypeSlot = + GetTypeSlot(cast(ElTy)->getElementType()); + if (!OpProps.AllowedVectorElements[I][EltTypeSlot]) + return false; + } + } + return true; + } + return true; } bool OP::CheckOpCodeTable() { @@ -3292,6 +3408,13 @@ bool OP::IsDxilOpBarrier(OpCode C) { // OPCODE-BARRIER:END } +bool OP::IsDxilOpExtendedOverload(OpCode C) { + if (C >= OpCode::NumOpCodes) + return false; + return m_OpCodeProps[static_cast(C)] + .bAllowOverload[kExtendedTypeSlot]; +} + static unsigned MaskMemoryTypeFlagsIfAllowed(unsigned memoryTypeFlags, unsigned allowedMask) { // If the memory type is AllMemory, masking inapplicable flags is allowed. @@ -3930,6 +4053,8 @@ void OP::FixOverloadNames() { if (F.isDeclaration() && OP::IsDxilOpFunc(&F) && !F.user_empty()) { CallInst *CI = cast(*F.user_begin()); DXIL::OpCode opCode = OP::GetDxilOpFuncCallInst(CI); + if (IsDxilOpExtendedOverload(opCode)) + continue; llvm::Type *Ty = OP::GetOverloadType(opCode, &F); if (!OP::IsOverloadLegal(opCode, Ty)) continue; @@ -3949,11 +4074,31 @@ void OP::UpdateCache(OpCodeClass opClass, Type *Ty, llvm::Function *F) { m_FunctionToOpClass[F] = opClass; } +Function *OP::GetOpFunc(OpCode opCode, ArrayRef ExtendedOverloads) { + DXASSERT(IsDxilOpExtendedOverload(opCode), + "otherwise, Dxil Op does not support extended overload"); + return GetOpFunc(opCode, GetExtendedOverloadType(ExtendedOverloads)); +} + Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { if (opCode == OpCode::NumOpCodes) return nullptr; if (!pOverloadType) return nullptr; + if (IsDxilOpExtendedOverload(opCode)) { + StructType *ST = dyn_cast(pOverloadType); + DXASSERT(ST != nullptr, + "otherwise, extended overload type is not a struct"); + if (ST == nullptr) + return nullptr; + bool EltCountValid = ST->getNumElements() > 1 && + ST->getNumElements() <= DXIL::kDxilMaxOloadDims; + DXASSERT(EltCountValid, + "otherwise, invalid type count for extended overload."); + if (!EltCountValid) + return nullptr; + } + // Illegal overloads are generated and eliminated by DXIL op constant // evaluation for a number of cases where a double overload of an HL intrinsic // that otherwise does not support double is used for literal values, when @@ -4006,6 +4151,9 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { #define CBRT(_y) A(GetCBufferRetType(_y)) #define VEC4(_y) A(GetVectorType(4, _y)) +// Extended Overload types are wrapped in an anonymous struct +#define EXT(_y) A(cast(pOverloadType)->getElementType(_y)) + /* hctdb_instrhelp.get_oloads_funcs()*/ switch (opCode) { // return opCode // OPCODE-OLOAD-FUNCS:BEGIN @@ -6023,6 +6171,23 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { A(pV); A(pI32); break; + case OpCode::RawBufferVectorLoad: + RRT(pETy); + A(pI32); + A(pRes); + A(pI32); + A(pI32); + A(pI32); + break; + case OpCode::RawBufferVectorStore: + A(pV); + A(pI32); + A(pRes); + A(pI32); + A(pI32); + A(pETy); + A(pI32); + break; // OPCODE-OLOAD-FUNCS:END default: DXASSERT(false, "otherwise unhandled case"); @@ -6171,6 +6336,7 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::StoreVertexOutput: case OpCode::StorePrimitiveOutput: case OpCode::DispatchMesh: + case OpCode::RawBufferVectorStore: if (FT->getNumParams() <= 4) return nullptr; return FT->getParamType(4); @@ -6417,7 +6583,8 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::TextureGatherRaw: case OpCode::SampleCmpLevel: case OpCode::SampleCmpGrad: - case OpCode::SampleCmpBias: { + case OpCode::SampleCmpBias: + case OpCode::RawBufferVectorLoad: { StructType *ST = cast(Ty); return ST->getElementType(0); } @@ -6472,7 +6639,15 @@ bool OP::IsResRetType(llvm::Type *Ty) { Type *OP::GetResRetType(Type *pOverloadType) { unsigned TypeSlot = GetTypeSlot(pOverloadType); - if (m_pResRetType[TypeSlot] == nullptr) { + if (TypeSlot == kVectorTypeSlot) { + string TypeName("dx.types.ResRet."); + VectorType *VecTy = cast(pOverloadType); + TypeName += "v"; + TypeName += std::to_string(VecTy->getNumElements()); + TypeName += GetOverloadTypeName(OP::GetTypeSlot(VecTy->getElementType())); + Type *FieldTypes[2] = {pOverloadType, Type::getInt32Ty(m_Ctx)}; + return GetOrCreateStructType(m_Ctx, FieldTypes, TypeName, m_pModule); + } else if (m_pResRetType[TypeSlot] == nullptr) { string TypeName("dx.types.ResRet."); TypeName += GetOverloadTypeName(TypeSlot); Type *FieldTypes[5] = {pOverloadType, pOverloadType, pOverloadType, @@ -6529,6 +6704,10 @@ Type *OP::GetVectorType(unsigned numElements, Type *pOverloadType) { return nullptr; } +StructType *OP::GetExtendedOverloadType(ArrayRef OverloadTypes) { + return StructType::get(m_Ctx, OverloadTypes); +} + //------------------------------------------------------------------------------ // // LLVM utility methods. diff --git a/lib/DXIL/DxilUtil.cpp b/lib/DXIL/DxilUtil.cpp index 757a0bc3ee..f6ffd7f7e2 100644 --- a/lib/DXIL/DxilUtil.cpp +++ b/lib/DXIL/DxilUtil.cpp @@ -426,35 +426,37 @@ GetHLSLResourceProperties(llvm::Type *Ty) { false, false, false)); if (name == "SamplerComparisonState") - return RetType( - true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Sampler, false, - false, /*cmp or counter*/ true)); + return RetType(true, MakeResourceProperties( + hlsl::DXIL::ResourceKind::Sampler, /*UAV*/ false, + /*ROV*/ false, /*cmp or counter*/ true)); if (name.startswith("AppendStructuredBuffer<")) - return RetType(true, MakeResourceProperties( - hlsl::DXIL::ResourceKind::StructuredBuffer, - false, false, /*cmp or counter*/ true)); + return RetType(true, + MakeResourceProperties( + hlsl::DXIL::ResourceKind::StructuredBuffer, + /*UAV*/ true, /*ROV*/ false, /*cmp or counter*/ true)); if (name.startswith("ConsumeStructuredBuffer<")) return RetType(true, MakeResourceProperties( hlsl::DXIL::ResourceKind::StructuredBuffer, - false, false, /*cmp or counter*/ true)); + /*UAV*/ false, /*ROV*/ false, + /*cmp or counter*/ true)); if (name == "RaytracingAccelerationStructure") return RetType(true, MakeResourceProperties( hlsl::DXIL::ResourceKind::RTAccelerationStructure, - false, false, false)); + /*UAV*/ false, /*ROV*/ false, false)); if (name.startswith("ConstantBuffer<")) - return RetType(true, - MakeResourceProperties(hlsl::DXIL::ResourceKind::CBuffer, - false, false, false)); + return RetType( + true, MakeResourceProperties(hlsl::DXIL::ResourceKind::CBuffer, + /*UAV*/ false, /*ROV*/ false, false)); if (name.startswith("TextureBuffer<")) - return RetType(true, - MakeResourceProperties(hlsl::DXIL::ResourceKind::TBuffer, - false, false, false)); + return RetType( + true, MakeResourceProperties(hlsl::DXIL::ResourceKind::TBuffer, + /*UAV*/ false, /*ROV*/ false, false)); if (ConsumePrefix(name, "FeedbackTexture2D")) { hlsl::DXIL::ResourceKind kind = hlsl::DXIL::ResourceKind::Invalid; @@ -464,7 +466,9 @@ GetHLSLResourceProperties(llvm::Type *Ty) { kind = hlsl::DXIL::ResourceKind::FeedbackTexture2D; if (name.startswith("<")) - return RetType(true, MakeResourceProperties(kind, false, false, false)); + return RetType(true, + MakeResourceProperties(kind, /*UAV*/ false, + /*ROV*/ false, /*Cmp*/ false)); return FalseRet; } @@ -475,63 +479,63 @@ GetHLSLResourceProperties(llvm::Type *Ty) { if (name == "ByteAddressBuffer") return RetType(true, MakeResourceProperties(hlsl::DXIL::ResourceKind::RawBuffer, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); if (name.startswith("Buffer<")) return RetType( true, MakeResourceProperties(hlsl::DXIL::ResourceKind::TypedBuffer, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); if (name.startswith("StructuredBuffer<")) return RetType(true, MakeResourceProperties( hlsl::DXIL::ResourceKind::StructuredBuffer, UAV, - ROV, false)); + ROV, /*Cmp*/ false)); if (ConsumePrefix(name, "Texture")) { if (name.startswith("1D<")) return RetType( true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Texture1D, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); if (name.startswith("1DArray<")) return RetType(true, MakeResourceProperties( hlsl::DXIL::ResourceKind::Texture1DArray, UAV, - ROV, false)); + ROV, /*Cmp*/ false)); if (name.startswith("2D<")) return RetType( true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Texture2D, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); if (name.startswith("2DArray<")) return RetType(true, MakeResourceProperties( hlsl::DXIL::ResourceKind::Texture2DArray, UAV, - ROV, false)); + ROV, /*Cmp*/ false)); if (name.startswith("3D<")) return RetType( true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Texture3D, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); if (name.startswith("Cube<")) return RetType( true, MakeResourceProperties(hlsl::DXIL::ResourceKind::TextureCube, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); if (name.startswith("CubeArray<")) return RetType(true, MakeResourceProperties( hlsl::DXIL::ResourceKind::TextureCubeArray, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); if (name.startswith("2DMS<")) return RetType( true, MakeResourceProperties(hlsl::DXIL::ResourceKind::Texture2DMS, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); if (name.startswith("2DMSArray<")) return RetType(true, MakeResourceProperties( hlsl::DXIL::ResourceKind::Texture2DMSArray, - UAV, ROV, false)); + UAV, ROV, /*Cmp*/ false)); return FalseRet; } } diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp index 0a2001a745..9c93b70cce 100644 --- a/lib/DxilValidation/DxilValidation.cpp +++ b/lib/DxilValidation/DxilValidation.cpp @@ -2037,7 +2037,7 @@ static void ValidateExternalFunction(Function *F, ValidationContext &ValCtx) { ValCtx.EmitInstrError(CI, ValidationRule::InstrOload); continue; } - dxilFunc = hlslOP->GetOpFunc(dxilOpcode, Ty->getScalarType()); + dxilFunc = hlslOP->GetOpFunc(dxilOpcode, Ty); } if (!dxilFunc) { @@ -2109,17 +2109,20 @@ static bool IsDxilBuiltinStructType(StructType *ST, hlsl::OP *hlslOP) { return true; unsigned EltNum = ST->getNumElements(); + Type *EltTy = ST->getElementType(0); switch (EltNum) { case 2: + // Check if it's a native vector resret. + if (EltTy->isVectorTy()) + return ST == hlslOP->GetResRetType(EltTy); + LLVM_FALLTHROUGH; case 4: - case 8: { // 2 for doubles, 8 for halfs. - Type *EltTy = ST->getElementType(0); + case 8: // 2 for doubles, 8 for halfs. return ST == hlslOP->GetCBufferRetType(EltTy); - } break; - case 5: { - Type *EltTy = ST->getElementType(0); + break; + case 5: return ST == hlslOP->GetResRetType(EltTy); - } break; + break; default: return false; } @@ -2193,6 +2196,8 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx, return true; if (Ty->isVectorTy()) { + if (ValCtx.DxilMod.GetShaderModel()->IsSM69Plus()) + return true; ValCtx.EmitTypeError(Ty, ValidationRule::TypesNoVector); return false; } diff --git a/lib/HLSL/DxilLinker.cpp b/lib/HLSL/DxilLinker.cpp index 68c83fc037..ca343662ab 100644 --- a/lib/HLSL/DxilLinker.cpp +++ b/lib/HLSL/DxilLinker.cpp @@ -1255,6 +1255,12 @@ void DxilLinkJob::RunPreparePass(Module &M) { // For static global handle. PM.add(createLowerStaticGlobalIntoAlloca()); + // Change dynamic indexing vector to array where vectors aren't + // supported, but might be there from the initial compile. + if (!pSM->IsSM69Plus()) + PM.add( + createDynamicIndexingVectorToArrayPass(false /* ReplaceAllVector */)); + // Remove MultiDimArray from function call arg. PM.add(createMultiDimArrayToOneDimArrayPass()); diff --git a/lib/HLSL/HLMatrixBitcastLowerPass.cpp b/lib/HLSL/HLMatrixBitcastLowerPass.cpp index 93ba3b9816..b708293fca 100644 --- a/lib/HLSL/HLMatrixBitcastLowerPass.cpp +++ b/lib/HLSL/HLMatrixBitcastLowerPass.cpp @@ -113,13 +113,13 @@ class MatrixBitcastLowerPass : public FunctionPass { // Lower matrix first. for (BitCastInst *BCI : matCastSet) { - lowerMatrix(BCI, BCI->getOperand(0)); + lowerMatrix(DM, BCI, BCI->getOperand(0)); } return bUpdated; } private: - void lowerMatrix(Instruction *M, Value *A); + void lowerMatrix(DxilModule &DM, Instruction *M, Value *A); bool hasCallUser(Instruction *M); }; @@ -180,7 +180,8 @@ Value *CreateEltGEP(Value *A, unsigned i, Value *zeroIdx, } } // namespace -void MatrixBitcastLowerPass::lowerMatrix(Instruction *M, Value *A) { +void MatrixBitcastLowerPass::lowerMatrix(DxilModule &DM, Instruction *M, + Value *A) { for (auto it = M->user_begin(); it != M->user_end();) { User *U = *(it++); if (GetElementPtrInst *GEP = dyn_cast(U)) { @@ -193,31 +194,42 @@ void MatrixBitcastLowerPass::lowerMatrix(Instruction *M, Value *A) { SmallVector idxList(GEP->idx_begin(), GEP->idx_end()); DXASSERT(idxList.size() == 2, "else not one dim matrix array index to matrix"); - - HLMatrixType MatTy = HLMatrixType::cast(EltTy); - Value *matSize = Builder.getInt32(MatTy.getNumElements()); - idxList.back() = Builder.CreateMul(idxList.back(), matSize); + if (!DM.GetShaderModel()->IsSM69Plus()) { + HLMatrixType MatTy = HLMatrixType::cast(EltTy); + Value *matSize = Builder.getInt32(MatTy.getNumElements()); + idxList.back() = Builder.CreateMul(idxList.back(), matSize); + } Value *NewGEP = Builder.CreateGEP(A, idxList); - lowerMatrix(GEP, NewGEP); + lowerMatrix(DM, GEP, NewGEP); DXASSERT(GEP->user_empty(), "else lower matrix fail"); GEP->eraseFromParent(); } else { DXASSERT(0, "invalid GEP for matrix"); } } else if (BitCastInst *BCI = dyn_cast(U)) { - lowerMatrix(BCI, A); + lowerMatrix(DM, BCI, A); DXASSERT(BCI->user_empty(), "else lower matrix fail"); BCI->eraseFromParent(); } else if (LoadInst *LI = dyn_cast(U)) { if (VectorType *Ty = dyn_cast(LI->getType())) { IRBuilder<> Builder(LI); - Value *zeroIdx = Builder.getInt32(0); - unsigned vecSize = Ty->getNumElements(); - Value *NewVec = UndefValue::get(LI->getType()); - for (unsigned i = 0; i < vecSize; i++) { - Value *GEP = CreateEltGEP(A, i, zeroIdx, Builder); - Value *Elt = Builder.CreateLoad(GEP); - NewVec = Builder.CreateInsertElement(NewVec, Elt, i); + Value *NewVec = nullptr; + if (DM.GetShaderModel()->IsSM69Plus()) { + // Just create a replacement load using the vector pointer. + Instruction *NewLI = LI->clone(); + unsigned VecIdx = NewLI->getNumOperands() - 1; + NewLI->setOperand(VecIdx, A); + Builder.Insert(NewLI); + NewVec = NewLI; + } else { + Value *zeroIdx = Builder.getInt32(0); + unsigned vecSize = Ty->getNumElements(); + NewVec = UndefValue::get(LI->getType()); + for (unsigned i = 0; i < vecSize; i++) { + Value *GEP = CreateEltGEP(A, i, zeroIdx, Builder); + Value *Elt = Builder.CreateLoad(GEP); + NewVec = Builder.CreateInsertElement(NewVec, Elt, i); + } } LI->replaceAllUsesWith(NewVec); LI->eraseFromParent(); diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index bc293357d6..d218136bd7 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -481,6 +481,44 @@ Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef refArgs, return TrivialDxilOperation(opcode, refArgs, Ty, Inst->getType(), hlslOP, B); } + +Value *TrivialDxilVectorOperation(Function *dxilFunc, OP::OpCode opcode, + ArrayRef refArgs, Type *Ty, + OP *hlslOP, IRBuilder<> &Builder) { + if (!Ty->isVoidTy()) { + Value *retVal = + Builder.CreateCall(dxilFunc, refArgs, hlslOP->GetOpCodeName(opcode)); + return retVal; + } else { + // Cannot add name to void. + return Builder.CreateCall(dxilFunc, refArgs); + } +} + + +Value *TrivialDxilVectorUnaryOperationRet(OP::OpCode opcode, Value *src, Type *Ty, + OP *hlslOP, IRBuilder<> &Builder) { + + Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); + Value *args[] = {opArg, src}; + + Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty); + + return TrivialDxilVectorOperation(dxilFunc, opcode, args, Ty, hlslOP, Builder); +} + +Value *TrivialDxilVectorBinaryOperation(OP::OpCode opcode, Value *src0, Value *src1, + hlsl::OP *hlslOP, IRBuilder<> &Builder) { + Type *Ty = src0->getType(); + + Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); + Value *args[] = {opArg, src0, src1}; + + Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty); + + return TrivialDxilVectorOperation(dxilFunc, opcode, args, Ty, hlslOP, Builder); +} + Value *TrivialDxilUnaryOperationRet(OP::OpCode opcode, Value *src, Type *RetTy, hlsl::OP *hlslOP, IRBuilder<> &Builder) { Type *Ty = src->getType(); @@ -507,17 +545,26 @@ Value *TrivialDxilBinaryOperation(OP::OpCode opcode, Value *src0, Value *src1, return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder); } -Value *TrivialDxilTrinaryOperation(OP::OpCode opcode, Value *src0, Value *src1, - Value *src2, hlsl::OP *hlslOP, - IRBuilder<> &Builder) { - Type *Ty = src0->getType(); - +Value *TrivialDxilTrinaryOperationRet(OP::OpCode opcode, Value *src0, Value *src1, + Value *src2, Type *Ty, hlsl::OP *hlslOP, + IRBuilder<> &Builder) { Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); Value *args[] = {opArg, src0, src1, src2}; return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder); } +Value *TrivialDxilVectorTrinaryOperationRet(OP::OpCode opcode, Value *src0, Value *src1, + Value *src2, Type *Ty, hlsl::OP *hlslOP, + IRBuilder<> &Builder) { + Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); + Value *args[] = {opArg, src0, src1, src2}; + + Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty); + + return TrivialDxilVectorOperation(dxilFunc, opcode, args, Ty, hlslOP, Builder); +} + Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, @@ -530,6 +577,24 @@ Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, return retVal; } +Value *TrivialVectorizableUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, + HLOperationLowerHelper &helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + Value *src0 = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); + Type *Ty = CI->getType(); + IRBuilder<> Builder(CI); + hlsl::OP *hlslOP = &helper.hlslOP; + + if (Ty->isVectorTy() && + helper.M.GetShaderModel()->IsSM69Plus()) + return TrivialDxilVectorUnaryOperationRet(opcode, src0, Ty, + hlslOP, Builder); + else + return TrivialDxilUnaryOperationRet(opcode, src0, Ty, + hlslOP, Builder); +} + Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, @@ -544,19 +609,36 @@ Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, return binOp; } -Value *TrivialTrinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, - bool &Translated) { +Value *TrivialVectorBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, + HLOperationLowerHelper &helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { hlsl::OP *hlslOP = &helper.hlslOP; + Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); + Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); + IRBuilder<> Builder(CI); + + Value *binOp = + TrivialDxilVectorBinaryOperation(opcode, src0, src1, hlslOP, Builder); + return binOp; +} + +Value *TranslateFMA(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, + HLOperationLowerHelper &helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + hlsl::OP *hlslOP = &helper.hlslOP; + Type *Ty = CI->getType(); Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); Value *src2 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); IRBuilder<> Builder(CI); - Value *triOp = - TrivialDxilTrinaryOperation(opcode, src0, src1, src2, hlslOP, Builder); - return triOp; + if (Ty->isVectorTy() && + helper.M.GetShaderModel()->IsSM69Plus()) + return TrivialDxilVectorTrinaryOperationRet(opcode, src0, src1, src2, Ty, hlslOP, Builder); + else + return TrivialDxilTrinaryOperationRet(opcode, src0, src1, src2, Ty, hlslOP, Builder); } Value *TrivialIsSpecialFloat(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -735,6 +817,12 @@ bool CanUseFxcMulOnlyPatternForPow(IRBuilder<> &Builder, Value *x, Value *pow, } } + // Only apply on aggregates of 16 or fewer elements, + // representing the max 4x4 matrix size. + Type *xTy = x->getType(); + if (xTy->isVectorTy() && xTy->getVectorNumElements() > 16) + return false; + APFloat powAPF = isa(pow) ? cast(pow)->getElementAsAPFloat(0) : // should be a splat value @@ -1896,9 +1984,16 @@ Value *TranslateClamp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, IRBuilder<> Builder(CI); // min(max(x, minVal), maxVal). - Value *maxXMinVal = + if (Ty->isVectorTy() && + helper.M.GetShaderModel()->IsSM69Plus()) { + Value *maxXMinVal = + TrivialDxilVectorBinaryOperation(maxOp, x, minVal, hlslOP, Builder); + return TrivialDxilVectorBinaryOperation(minOp, maxXMinVal, maxVal, hlslOP, Builder); + } else { + Value *maxXMinVal = TrivialDxilBinaryOperation(maxOp, x, minVal, hlslOP, Builder); - return TrivialDxilBinaryOperation(minOp, maxXMinVal, maxVal, hlslOP, Builder); + return TrivialDxilBinaryOperation(minOp, maxXMinVal, maxVal, hlslOP, Builder); + } } Value *TranslateClip(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -2211,8 +2306,11 @@ Value *TranslateExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, ConstantVector::getSplat(Ty->getVectorNumElements(), log2eConst); } val = Builder.CreateFMul(log2eConst, val); - Value *exp = TrivialDxilUnaryOperation(OP::OpCode::Exp, val, hlslOP, Builder); - return exp; + if (Ty->isVectorTy() && + helper.M.GetShaderModel()->IsSM69Plus()) + return TrivialDxilVectorUnaryOperationRet(OP::OpCode::Exp, val, Ty, hlslOP, Builder); + else + return TrivialDxilUnaryOperationRet(OP::OpCode::Exp, val, Ty, hlslOP, Builder); } Value *TranslateLog(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -2227,7 +2325,12 @@ Value *TranslateLog(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, if (Ty != Ty->getScalarType()) { ln2Const = ConstantVector::getSplat(Ty->getVectorNumElements(), ln2Const); } - Value *log = TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder); + Value *log = nullptr; + if (Ty->isVectorTy() && + helper.M.GetShaderModel()->IsSM69Plus()) + log = TrivialDxilVectorUnaryOperationRet(OP::OpCode::Log, val, Ty, hlslOP, Builder); + else + log = TrivialDxilUnaryOperationRet(OP::OpCode::Log, val, Ty, hlslOP, Builder); return Builder.CreateFMul(ln2Const, log); } @@ -2287,8 +2390,13 @@ Value *TranslateFUIBinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, break; } } - return TrivialBinaryOperation(CI, IOP, opcode, helper, pObjHelper, - Translated); + if (CI->getType()->isVectorTy() && + helper.M.GetShaderModel()->IsSM69Plus()) + return TrivialVectorBinaryOperation(CI, IOP, opcode, helper, pObjHelper, + Translated); + else + return TrivialBinaryOperation(CI, IOP, opcode, helper, pObjHelper, + Translated); } Value *TranslateFUITrinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -2305,8 +2413,15 @@ Value *TranslateFUITrinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, break; } } - return TrivialTrinaryOperation(CI, IOP, opcode, helper, pObjHelper, - Translated); + + hlsl::OP *hlslOP = &helper.hlslOP; + Type *Ty = CI->getType(); + Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); + Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); + Value *src2 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); + IRBuilder<> Builder(CI); + + return TrivialDxilTrinaryOperationRet(opcode, src0, src1, src2, Ty, hlslOP, Builder); } Value *TranslateFrexp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -2428,18 +2543,22 @@ Value *TrivialDotOperation(OP::OpCode opcode, Value *src0, Value *src1, return dotOP; } -Value *TranslateIDot(Value *arg0, Value *arg1, unsigned vecSize, - hlsl::OP *hlslOP, IRBuilder<> &Builder, - bool Unsigned = false) { +// Instead of using a DXIL intrinsic, implement a dot product operation using +// multiply and add operations. Used for integer dots and long vectors. +Value *ExpandDot(Value *arg0, Value *arg1, unsigned vecSize, + hlsl::OP *hlslOP, IRBuilder<> &Builder, + bool Unsigned = false) { auto madOpCode = Unsigned ? DXIL::OpCode::UMad : DXIL::OpCode::IMad; + if (arg0->getType()->getScalarType()->isFloatingPointTy()) + madOpCode = DXIL::OpCode::FMad; Value *Elt0 = Builder.CreateExtractElement(arg0, (uint64_t)0); Value *Elt1 = Builder.CreateExtractElement(arg1, (uint64_t)0); Value *Result = Builder.CreateMul(Elt0, Elt1); - for (unsigned iVecElt = 1; iVecElt < vecSize; ++iVecElt) { - Elt0 = Builder.CreateExtractElement(arg0, iVecElt); - Elt1 = Builder.CreateExtractElement(arg1, iVecElt); - Result = TrivialDxilTrinaryOperation(madOpCode, Elt0, Elt1, Result, hlslOP, - Builder); + for (unsigned Elt = 1; Elt < vecSize; ++Elt) { + Elt0 = Builder.CreateExtractElement(arg0, Elt); + Elt1 = Builder.CreateExtractElement(arg1, Elt); + Result = TrivialDxilTrinaryOperationRet(madOpCode, Elt0, Elt1, Result, Elt0->getType(), hlslOP, + Builder); } return Result; @@ -2477,10 +2596,10 @@ Value *TranslateDot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, unsigned vecSize = Ty->getVectorNumElements(); Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); IRBuilder<> Builder(CI); - if (Ty->getScalarType()->isFloatingPointTy()) { + if (Ty->getScalarType()->isFloatingPointTy() && Ty->getVectorNumElements() <= 4) { return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder); } else { - return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder, + return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, IOP == IntrinsicOp::IOP_udot); } } @@ -2664,8 +2783,8 @@ Value *TranslateMSad4(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 3); // Msad on vecref and byteSrc. - return TrivialDxilTrinaryOperation(DXIL::OpCode::Msad, vecRef, byteSrc, accum, - hlslOP, Builder); + return TrivialDxilTrinaryOperationRet(DXIL::OpCode::Msad, vecRef, byteSrc, accum, + vecRef->getType(), hlslOP, Builder); } Value *TranslateRCP(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -2988,9 +3107,10 @@ static Value *ScalarizeResRet(Type *RetTy, Value *ResRet, static Value *ScalarizeElements(Type *RetTy, ArrayRef Elts, IRBuilder<> &Builder) { // Extract value part. - Value *retVal = llvm::UndefValue::get(RetTy); + Value *retVal = nullptr; if (RetTy->isVectorTy()) { unsigned vecSize = RetTy->getVectorNumElements(); + retVal = UndefValue::get(VectorType::get(Elts[0]->getType(), vecSize)); DXASSERT(vecSize <= Elts.size(), "vector size mismatch"); for (unsigned i = 0; i < vecSize; i++) { Value *retComp = Elts[i]; @@ -3046,7 +3166,7 @@ Value *TranslateMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, if (arg0Ty->getScalarType()->isFloatingPointTy()) { return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder); } else { - return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder, + return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, IOP == IntrinsicOp::IOP_umul); } } else { @@ -3941,14 +4061,40 @@ TranslateWriteSamplerFeedback(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, } // Load/Store intrinsics. +OP::OpCode LoadOpFromResKind(DxilResource::Kind RK) { + switch (RK) { + case DxilResource::Kind::RawBuffer: + case DxilResource::Kind::StructuredBuffer: + return OP::OpCode::RawBufferLoad; + case DxilResource::Kind::TypedBuffer: + return OP::OpCode::BufferLoad; + case DxilResource::Kind::Invalid: + DXASSERT(0, "invalid resource kind"); + break; + default: + return OP::OpCode::TextureLoad; + } + return OP::OpCode::TextureLoad; +} + struct ResLoadHelper { + // Default constructor uses CI load intrinsic call + // to get the retval and various location indicators. ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC, - Value *h, IntrinsicOp IOP, bool bForSubscript = false); - // For double subscript. - ResLoadHelper(Instruction *ldInst, Value *h, Value *idx, Value *mip) - : opcode(OP::OpCode::TextureLoad), - intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(ldInst), - addr(idx), offset(nullptr), status(nullptr), mipLevel(mip) {} + Value *h, IntrinsicOp IOP, LoadInst *TyBufSubLoad = nullptr); + // Alternative constructor explicitly sets the index. + // Used for some subscript operators. + ResLoadHelper(Instruction *ldInst, DxilResource::Kind RK, Value *h, + Value *idx, Value *Offset, Value *mip = nullptr) + : intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(ldInst), + addr(idx), offset(Offset), status(nullptr), mipLevel(mip) { + opcode = LoadOpFromResKind(RK); + if (opcode == OP::OpCode::RawBufferLoad && + ldInst->getType()->isVectorTy() && + ldInst->getType()->getVectorNumElements() > 1 && + ldInst->getModule()->GetHLModule().GetShaderModel()->IsSM69Plus()) + opcode = OP::OpCode::RawBufferVectorLoad; + } OP::OpCode opcode; IntrinsicOp intrinsicOpCode; unsigned dxilMajor; @@ -3961,26 +4107,23 @@ struct ResLoadHelper { Value *mipLevel; }; +// Uses CI arguments to determine the index, offset, and mipLevel also depending +// on the RK/RC resource kind and class, which determine the opcode. +// Handle and IOP are set explicitly. +// For typed buffer loads, the call instruction feeds into a load +// represented by TyBufSubLoad which determines the instruction to replace. +// Otherwise, CI is replaced. ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC, Value *hdl, - IntrinsicOp IOP, bool bForSubscript) + IntrinsicOp IOP, LoadInst *TyBufSubLoad) : intrinsicOpCode(IOP), handle(hdl), offset(nullptr), status(nullptr) { - switch (RK) { - case DxilResource::Kind::RawBuffer: - case DxilResource::Kind::StructuredBuffer: - opcode = OP::OpCode::RawBufferLoad; - break; - case DxilResource::Kind::TypedBuffer: - opcode = OP::OpCode::BufferLoad; - break; - case DxilResource::Kind::Invalid: - DXASSERT(0, "invalid resource kind"); - break; - default: - opcode = OP::OpCode::TextureLoad; - break; - } - retVal = CI; + opcode = LoadOpFromResKind(RK); + bool bForSubscript = false; + if (TyBufSubLoad) { + bForSubscript = true; + retVal = TyBufSubLoad; + } else + retVal = CI; const unsigned kAddrIdx = HLOperandIndex::kBufLoadAddrOpIdx; addr = CI->getArgOperand(kAddrIdx); unsigned argc = CI->getNumArgOperands(); @@ -4013,8 +4156,9 @@ ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK, RK == DxilResource::Kind::Texture2DMSArray) { offsetIdx = HLOperandIndex::kTex2DMSLoadOffsetOpIdx; statusIdx = HLOperandIndex::kTex2DMSLoadStatusOpIdx; - mipLevel = - CI->getArgOperand(HLOperandIndex::kTex2DMSLoadSampleIdxOpIdx); + if (!bForSubscript) + mipLevel = + CI->getArgOperand(HLOperandIndex::kTex2DMSLoadSampleIdxOpIdx); } if (argc > offsetIdx) @@ -4026,7 +4170,12 @@ ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK, (RK == DxilResource::Kind::Texture2DMS || RK == DxilResource::Kind::Texture2DMSArray)) { unsigned statusIdx = HLOperandIndex::kTex2DMSLoadStatusOpIdx; - mipLevel = CI->getArgOperand(HLOperandIndex::kTex2DMSLoadSampleIdxOpIdx); + + if (!bForSubscript) + mipLevel = + CI->getArgOperand(HLOperandIndex::kTex2DMSLoadSampleIdxOpIdx); + else + mipLevel = IRBuilder<>(CI).getInt32(0); if (argc > statusIdx) status = CI->getArgOperand(statusIdx); @@ -4038,9 +4187,19 @@ ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK, status = CI->getArgOperand(kStatusIdx); } } else { + if (opcode == OP::OpCode::RawBufferLoad && + CI->getType()->isVectorTy() && + CI->getType()->getVectorNumElements() > 1 && + CI->getModule()->GetHLModule().GetShaderModel()->IsSM69Plus()) + opcode = OP::OpCode::RawBufferVectorLoad; const unsigned kStatusIdx = HLOperandIndex::kBufLoadStatusOpIdx; if (argc > kStatusIdx) status = CI->getArgOperand(kStatusIdx); + Type *i32Ty = IRBuilder<>(CI).getInt32Ty(); + if (DXIL::IsStructuredBuffer(RK)) + offset = ConstantInt::get(i32Ty, 0U); + else + offset = UndefValue::get(i32Ty); } } @@ -4048,35 +4207,6 @@ void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status, hlsl::OP *OP, HLResource::Kind RK, const DataLayout &DL); -// Create { v0, v1 } from { v0.lo, v0.hi, v1.lo, v1.hi } -void Make64bitResultForLoad(Type *EltTy, ArrayRef resultElts32, - unsigned size, MutableArrayRef resultElts, - hlsl::OP *hlslOP, IRBuilder<> &Builder) { - Type *i64Ty = Builder.getInt64Ty(); - Type *doubleTy = Builder.getDoubleTy(); - if (EltTy == doubleTy) { - Function *makeDouble = - hlslOP->GetOpFunc(DXIL::OpCode::MakeDouble, doubleTy); - Value *makeDoubleOpArg = - Builder.getInt32((unsigned)DXIL::OpCode::MakeDouble); - for (unsigned i = 0; i < size; i++) { - Value *lo = resultElts32[2 * i]; - Value *hi = resultElts32[2 * i + 1]; - Value *V = Builder.CreateCall(makeDouble, {makeDoubleOpArg, lo, hi}); - resultElts[i] = V; - } - } else { - for (unsigned i = 0; i < size; i++) { - Value *lo = resultElts32[2 * i]; - Value *hi = resultElts32[2 * i + 1]; - lo = Builder.CreateZExt(lo, i64Ty); - hi = Builder.CreateZExt(hi, i64Ty); - hi = Builder.CreateShl(hi, 32); - resultElts[i] = Builder.CreateOr(lo, hi); - } - } -} - static Constant *GetRawBufferMaskForETy(Type *Ty, unsigned NumComponents, hlsl::OP *OP) { unsigned mask = 0; @@ -4108,183 +4238,165 @@ Value *GenerateRawBufLd(Value *handle, Value *bufIdx, Value *offset, IRBuilder<> &Builder, unsigned NumComponents, Constant *alignment); -static Value *TranslateRawBufVecLd(Type *VecEltTy, unsigned VecElemCount, - IRBuilder<> &Builder, Value *handle, - hlsl::OP *OP, Value *status, Value *bufIdx, - Value *baseOffset, const DataLayout &DL, - std::vector &bufLds, - unsigned baseAlign, bool isScalarTy = false); - -void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK, - IRBuilder<> &Builder, hlsl::OP *OP, const DataLayout &DL) { - - Type *Ty = helper.retVal->getType(); - if (Ty->isPointerTy()) { - DXASSERT(!DxilResource::IsAnyTexture(RK), - "Textures should not be treated as structured buffers."); - TranslateStructBufSubscript(cast(helper.retVal), helper.handle, - helper.status, OP, RK, DL); - return; - } +static Value *GenerateBufLd(hlsl::OP *OP, IRBuilder<> &Builder, + OP::OpCode opcode, Type *RegTy, Type *MemTy, + unsigned NumElements, ArrayRef Args, + std::vector::iterator EltIt); +// Sets up arguments for buffer load call. +static SmallVector GetBufLoadArgs(ResLoadHelper helper, + HLResource::Kind RK, + IRBuilder<> Builder, Type *EltTy, + unsigned LdSize) { OP::OpCode opcode = helper.opcode; + llvm::Constant *opArg = Builder.getInt32((uint32_t)opcode); - Type *i32Ty = Builder.getInt32Ty(); - Type *i64Ty = Builder.getInt64Ty(); - Type *doubleTy = Builder.getDoubleTy(); - Type *EltTy = Ty->getScalarType(); - unsigned numComponents = 1; - if (Ty->isVectorTy()) { - numComponents = Ty->getVectorNumElements(); - } - - if (DXIL::IsStructuredBuffer(RK) || DXIL::IsRawBuffer(RK)) { - std::vector bufLds; - const bool isBool = EltTy->isIntegerTy(1); + unsigned alignment = RK == DxilResource::Kind::RawBuffer ? 4U : 8U; + alignment = std::min(alignment, LdSize); + Constant *alignmentVal = Builder.getInt32(alignment); - // Bool are represented as i32 in memory - Type *MemReprTy = isBool ? Builder.getInt32Ty() : EltTy; - bool isScalarTy = !Ty->isVectorTy(); - - Value *retValNew = nullptr; - if (DXIL::IsStructuredBuffer(RK)) { - retValNew = TranslateRawBufVecLd( - MemReprTy, numComponents, Builder, helper.handle, OP, helper.status, - helper.addr, OP->GetU32Const(0), DL, bufLds, - /*baseAlign (in bytes)*/ 8, isScalarTy); - } else { - retValNew = - TranslateRawBufVecLd(MemReprTy, numComponents, Builder, helper.handle, - OP, helper.status, nullptr, helper.addr, DL, - bufLds, /*baseAlign (in bytes)*/ 4, isScalarTy); - } - - DXASSERT_NOMSG(!bufLds.empty()); - dxilutil::MigrateDebugValue(helper.retVal, bufLds.front()); - - if (isBool) { - // Convert result back to register representation. - retValNew = Builder.CreateICmpNE( - retValNew, Constant::getNullValue(retValNew->getType())); - } - - helper.retVal->replaceAllUsesWith(retValNew); - helper.retVal = retValNew; - return; - } - - bool isTyped = opcode == OP::OpCode::TextureLoad || - RK == DxilResource::Kind::TypedBuffer; - bool is64 = EltTy == i64Ty || EltTy == doubleTy; - if (is64 && isTyped) { - EltTy = i32Ty; - } - bool isBool = EltTy->isIntegerTy(1); - if (isBool) { - // Value will be loaded in its memory representation. - EltTy = i32Ty; - if (Ty->isVectorTy()) - Ty = VectorType::get(EltTy, numComponents); - } - - Function *F = OP->GetOpFunc(opcode, EltTy); - llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode); - - llvm::Value *undefI = llvm::UndefValue::get(i32Ty); - - SmallVector loadArgs; - loadArgs.emplace_back(opArg); // opcode - loadArgs.emplace_back(helper.handle); // resource handle + // Assemble args is specific to the type bab/struct/typed + // Typed needs to handle the possibility of vector coords + // Raws need to calculate alignment and mask values. + SmallVector Args; + Args.emplace_back(opArg); // opcode @0 + Args.emplace_back(helper.handle); // resource handle @1 + // Set offsets appropriate for the load operation. + bool isVectorAddr = helper.addr->getType()->isVectorTy(); if (opcode == OP::OpCode::TextureLoad) { - // set mip level - loadArgs.emplace_back(helper.mipLevel); - } + llvm::Value *undefI = llvm::UndefValue::get(Builder.getInt32Ty()); - if (opcode == OP::OpCode::TextureLoad) { - // texture coord + // Set mip level or sample for MS texutures @3. + Args.emplace_back(helper.mipLevel); + // Set texture coords according to resource kind @4-6 + // Coords unused by the resource kind are undefs. unsigned coordSize = DxilResource::GetNumCoords(RK); - bool isVectorAddr = helper.addr->getType()->isVectorTy(); for (unsigned i = 0; i < 3; i++) { if (i < coordSize) { - loadArgs.emplace_back(isVectorAddr - ? Builder.CreateExtractElement(helper.addr, i) - : helper.addr); + Args.emplace_back(isVectorAddr + ? Builder.CreateExtractElement(helper.addr, i) + : helper.addr); } else - loadArgs.emplace_back(undefI); + Args.emplace_back(undefI); + } + // Set texture offsets according to resource kind @7-9 + // Coords unused by the resource kind are undefs. + unsigned offsetSize = DxilResource::GetNumOffsets(RK); + if (!helper.offset || isa(helper.offset)) + offsetSize = 0; + for (unsigned i = 0; i < 3; i++) { + if (i < offsetSize) + Args.emplace_back(Builder.CreateExtractElement(helper.offset, i)); + else + Args.emplace_back(undefI); } } else { - if (helper.addr->getType()->isVectorTy()) { - Value *scalarOffset = - Builder.CreateExtractElement(helper.addr, (uint64_t)0); + // coord (may be changed later) @2 + Args.emplace_back( + isVectorAddr ? Builder.CreateExtractElement(helper.addr, (uint64_t)0) + : helper.addr); + Args.emplace_back(helper.offset); // offset (may be changed later) @3 + + if (opcode == OP::OpCode::RawBufferLoad) { + // Unlike typed buffer load, raw buffer load has mask and alignment. + Args.emplace_back(nullptr); // mask (to be added later) @4 + Args.emplace_back(alignmentVal); // alignment @5 + } else if (opcode == OP::OpCode::RawBufferVectorLoad) { + // RawBufferVectorLoad takes no mask argument. + Args.emplace_back(alignmentVal); // alignment @4 + } - // TODO: calculate the real address based on opcode + } + return Args; +} - loadArgs.emplace_back(scalarOffset); // offset - } else { - // TODO: calculate the real address based on opcode +// Emits as many calls as needed to load the full vector +// Performs any needed extractions and conversions of the results. +Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK, + IRBuilder<> &Builder, hlsl::OP *OP, + const DataLayout &DL) { + OP::OpCode opcode = helper.opcode; + Type *Ty = helper.retVal->getType(); + unsigned numComponents = 1; + if (Ty->isVectorTy()) + numComponents = Ty->getVectorNumElements(); - loadArgs.emplace_back(helper.addr); // offset - } - } - // offset 0 - if (opcode == OP::OpCode::TextureLoad) { - if (helper.offset && !isa(helper.offset)) { - unsigned offsetSize = DxilResource::GetNumOffsets(RK); - for (unsigned i = 0; i < 3; i++) { - if (i < offsetSize) - loadArgs.emplace_back(Builder.CreateExtractElement(helper.offset, i)); - else - loadArgs.emplace_back(undefI); - } - } else { - loadArgs.emplace_back(undefI); - loadArgs.emplace_back(undefI); - loadArgs.emplace_back(undefI); - } - } + const bool isTyped = DXIL::IsTyped(RK); + Type *EltTy = Ty->getScalarType(); + const bool is64 = (EltTy->isIntegerTy(64) || EltTy->isDoubleTy()); + const bool isBool = EltTy->isIntegerTy(1); + Type *MemTy = EltTy; + if (isBool || (is64 && isTyped)) + // Value will be loaded in its memory representation. + MemTy = Builder.getInt32Ty(); + unsigned LdSize = DL.getTypeAllocSize(MemTy); - // Offset 1 - if (RK == DxilResource::Kind::TypedBuffer) { - loadArgs.emplace_back(undefI); - } + std::vector elts(numComponents); + + SmallVector Args = + GetBufLoadArgs(helper, RK, Builder, MemTy, LdSize); - Value *ResRet = Builder.CreateCall(F, loadArgs, OP->GetOpCodeName(opcode)); - dxilutil::MigrateDebugValue(helper.retVal, ResRet); + const unsigned kCoordIdx = 2; + const unsigned kOffsetIdx = 3; + const unsigned kMaskIdx = 4; + // Keep track of the first load for debug info migration. + Value *FirstLd = nullptr; Value *retValNew = nullptr; - if (!is64 || !isTyped) { - retValNew = ScalarizeResRet(Ty, ResRet, Builder); + if (opcode == OP::OpCode::RawBufferVectorLoad) { + FirstLd = GenerateBufLd(OP, Builder, opcode, Ty->getScalarType(), + VectorType::get(MemTy, numComponents), 1, Args, + elts.begin()); + UpdateStatus(FirstLd, helper.status, Builder, OP); + retValNew = elts[0]; } else { - unsigned size = numComponents; - DXASSERT(size <= 2, "typed buffer only allow 4 dwords"); - EltTy = Ty->getScalarType(); - Value *Elts[2]; - - Make64bitResultForLoad(Ty->getScalarType(), - { - Builder.CreateExtractValue(ResRet, 0), - Builder.CreateExtractValue(ResRet, 1), - Builder.CreateExtractValue(ResRet, 2), - Builder.CreateExtractValue(ResRet, 3), - }, - size, Elts, OP, Builder); - - retValNew = ScalarizeElements(Ty, Elts, Builder); + // Create calls to function object. + // Typed buffer loads are limited to one load of up to 4 32-bit values. + // Raw buffer loads might need multiple loads in chunks of 4. + for (unsigned i = 0; i < numComponents;) { + unsigned chunkSize = (numComponents - i) <= 4 ? numComponents - i : 4; + + // Assign mask for raw buffer loads. + if (opcode == OP::OpCode::RawBufferLoad) + Args[kMaskIdx] = GetRawBufferMaskForETy(MemTy, chunkSize, OP); + + Value *Ld = GenerateBufLd(OP, Builder, opcode, Ty->getScalarType(), MemTy, + chunkSize, Args, elts.begin() + i); + i += chunkSize; + + // Update status. + UpdateStatus(Ld, helper.status, Builder, OP); + + if (!FirstLd) + FirstLd = Ld; + + if (opcode == OP::OpCode::RawBufferLoad && i < numComponents) { + if (RK == DxilResource::Kind::RawBuffer) + // Raw buffers can't use offset param. Add to coord index. + Args[kCoordIdx] = + Builder.CreateAdd(Args[kCoordIdx], OP->GetU32Const(4 * LdSize)); + else + // Structured buffers increment the offset parameter. + Args[kOffsetIdx] = + Builder.CreateAdd(Args[kOffsetIdx], OP->GetU32Const(4 * LdSize)); + } + } + retValNew = ScalarizeElements(Ty, elts, Builder); } + DXASSERT(FirstLd, "No loads created by TranslateBufLoad"); + if (isBool) { // Convert result back to register representation. retValNew = Builder.CreateICmpNE( retValNew, Constant::getNullValue(retValNew->getType())); } - // replace helper.retVal->replaceAllUsesWith(retValNew); - // Save new ret val. helper.retVal = retValNew; - // get status - UpdateStatus(ResRet, helper.status, Builder, OP); + + return FirstLd; } Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -4292,6 +4404,7 @@ Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { hlsl::OP *hlslOP = &helper.hlslOP; + DataLayout &DL = helper.dataLayout; Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); IRBuilder<> Builder(CI); @@ -4299,9 +4412,19 @@ Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, DXIL::ResourceClass RC = pObjHelper->GetRC(handle); DXIL::ResourceKind RK = pObjHelper->GetRK(handle); - ResLoadHelper loadHelper(CI, RK, RC, handle, IOP); - TranslateLoad(loadHelper, RK, Builder, hlslOP, helper.dataLayout); - // CI is replaced in TranslateLoad. + ResLoadHelper ldHelper(CI, RK, RC, handle, IOP); + Type *Ty = CI->getType(); + Value *Ld = nullptr; + if (Ty->isPointerTy()) { + DXASSERT(!DxilResource::IsAnyTexture(RK), + "Textures should not be treated as structured buffers."); + TranslateStructBufSubscript(cast(ldHelper.retVal), handle, + ldHelper.status, hlslOP, RK, DL); + } else { + Ld = TranslateBufLoad(ldHelper, RK, Builder, hlslOP, DL); + dxilutil::MigrateDebugValue(CI, Ld); + } + // CI is replaced by above translation calls.. return nullptr; } @@ -4345,19 +4468,20 @@ void Split64bitValForStore(Type *EltTy, ArrayRef vals, unsigned size, } void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, - Value *offset, IRBuilder<> &Builder, hlsl::OP *OP, - Value *sampIdx = nullptr) { + Value *Idx, Value *offset, IRBuilder<> &Builder, + hlsl::OP *OP, Value *sampIdx = nullptr) { Type *Ty = val->getType(); - - // This function is no longer used for lowering stores to a - // structured buffer. - DXASSERT_NOMSG(RK != DxilResource::Kind::StructuredBuffer); - OP::OpCode opcode = OP::OpCode::NumOpCodes; + bool isTyped = true; switch (RK) { case DxilResource::Kind::RawBuffer: case DxilResource::Kind::StructuredBuffer: + isTyped = false; opcode = OP::OpCode::RawBufferStore; + // Where shader model and type allows, use vector store intrinsic. + if (OP->GetModule()->GetHLModule().GetShaderModel()->IsSM69Plus() && + Ty->isVectorTy() && Ty->getVectorNumElements() > 1) + opcode = OP::OpCode::RawBufferVectorStore; break; case DxilResource::Kind::TypedBuffer: opcode = OP::OpCode::BufferStore; @@ -4374,10 +4498,6 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, break; } - bool isTyped = opcode == OP::OpCode::TextureStore || - opcode == OP::OpCode::TextureStoreSample || - RK == DxilResource::Kind::TypedBuffer; - Type *i32Ty = Builder.getInt32Ty(); Type *i64Ty = Builder.getInt64Ty(); Type *doubleTy = Builder.getDoubleTy(); @@ -4404,7 +4524,6 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, EltTy = i32Ty; } - Function *F = OP->GetOpFunc(opcode, EltTy); llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode); llvm::Value *undefI = @@ -4416,44 +4535,58 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, storeArgs.emplace_back(opArg); // opcode storeArgs.emplace_back(handle); // resource handle - unsigned offset0Idx = 0; - if (RK == DxilResource::Kind::RawBuffer || - RK == DxilResource::Kind::TypedBuffer) { - // Offset 0 - if (offset->getType()->isVectorTy()) { - Value *scalarOffset = Builder.CreateExtractElement(offset, (uint64_t)0); - storeArgs.emplace_back(scalarOffset); // offset + unsigned OffsetIdx = 0; + if (opcode == OP::OpCode::RawBufferStore || + opcode == OP::OpCode::RawBufferVectorStore || + opcode == OP::OpCode::BufferStore) { + // Append Coord0 (Index) value. + if (Idx->getType()->isVectorTy()) { + Value *ScalarIdx = Builder.CreateExtractElement(Idx, (uint64_t)0); + storeArgs.emplace_back(ScalarIdx); // Coord0 (Index). } else { - storeArgs.emplace_back(offset); // offset + storeArgs.emplace_back(Idx); // Coord0 (Index). } - // Store offset0 for later use - offset0Idx = storeArgs.size() - 1; + // Store OffsetIdx representing the argument that may need to be incremented + // later to load additional chunks of data. + // Only structured buffers can use the offset parameter. + // Others must increment the index. + if (RK == DxilResource::Kind::StructuredBuffer) + OffsetIdx = storeArgs.size(); + else + OffsetIdx = storeArgs.size() - 1; - // Offset 1 - storeArgs.emplace_back(undefI); + // Coord1 (Offset). + storeArgs.emplace_back(offset); } else { // texture store unsigned coordSize = DxilResource::GetNumCoords(RK); // Set x first. - if (offset->getType()->isVectorTy()) - storeArgs.emplace_back(Builder.CreateExtractElement(offset, (uint64_t)0)); + if (Idx->getType()->isVectorTy()) + storeArgs.emplace_back(Builder.CreateExtractElement(Idx, (uint64_t)0)); else - storeArgs.emplace_back(offset); - - // Store offset0 for later use - offset0Idx = storeArgs.size() - 1; + storeArgs.emplace_back(Idx); for (unsigned i = 1; i < 3; i++) { if (i < coordSize) - storeArgs.emplace_back(Builder.CreateExtractElement(offset, i)); + storeArgs.emplace_back(Builder.CreateExtractElement(Idx, i)); else storeArgs.emplace_back(undefI); } // TODO: support mip for texture ST } + // RawBufferVectorStore only takes a single value and alignment arguments. + if (opcode == DXIL::OpCode::RawBufferVectorStore) { + storeArgs.emplace_back(val); + storeArgs.emplace_back(Alignment); + Function *F = OP->GetOpFunc(DXIL::OpCode::RawBufferVectorStore, Ty); + Builder.CreateCall(F, storeArgs); + return; + } + Function *F = OP->GetOpFunc(opcode, EltTy); + constexpr unsigned MaxStoreElemCount = 4; const unsigned CompCount = Ty->isVectorTy() ? Ty->getVectorNumElements() : 1; const unsigned StoreInstCount = @@ -4474,23 +4607,17 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, } for (unsigned j = 0; j < storeArgsList.size(); j++) { - - // For second and subsequent store calls, increment the offset0 (i.e. store - // index) + // For second and subsequent store calls, increment the resource-appropriate + // index or offset parameter. if (j > 0) { - // Greater than four-components store is not allowed for - // TypedBuffer and Textures. So greater than four elements - // scenario should only get hit here for RawBuffer. - DXASSERT_NOMSG(RK == DxilResource::Kind::RawBuffer); unsigned EltSize = OP->GetAllocSizeForType(EltTy); - unsigned newOffset = EltSize * MaxStoreElemCount * j; - Value *newOffsetVal = ConstantInt::get(Builder.getInt32Ty(), newOffset); - newOffsetVal = - Builder.CreateAdd(storeArgsList[0][offset0Idx], newOffsetVal); - storeArgsList[j][offset0Idx] = newOffsetVal; + unsigned NewCoord = EltSize * MaxStoreElemCount * j; + Value *NewCoordVal = ConstantInt::get(Builder.getInt32Ty(), NewCoord); + NewCoordVal = Builder.CreateAdd(storeArgsList[0][OffsetIdx], NewCoordVal); + storeArgsList[j][OffsetIdx] = NewCoordVal; } - // values + // Set value parameters. uint8_t mask = 0; if (Ty->isVectorTy()) { unsigned vecSize = @@ -4586,7 +4713,8 @@ Value *TranslateResourceStore(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, Value *val = CI->getArgOperand(HLOperandIndex::kStoreValOpIdx); Value *offset = CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx); - TranslateStore(RK, handle, val, offset, Builder, hlslOP); + Value *UndefI = UndefValue::get(Builder.getInt32Ty()); + TranslateStore(RK, handle, val, offset, UndefI, Builder, hlslOP); return nullptr; } @@ -6091,20 +6219,8 @@ Value *TranslateAnd(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, bool &Translated) { Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); - Type *Ty = CI->getType(); - Type *EltTy = Ty->getScalarType(); IRBuilder<> Builder(CI); - if (Ty != EltTy) { - Value *Result = UndefValue::get(Ty); - for (unsigned i = 0; i < Ty->getVectorNumElements(); i++) { - Value *EltX = Builder.CreateExtractElement(x, i); - Value *EltY = Builder.CreateExtractElement(y, i); - Value *tmp = Builder.CreateAnd(EltX, EltY); - Result = Builder.CreateInsertElement(Result, tmp, i); - } - return Result; - } return Builder.CreateAnd(x, y); } Value *TranslateOr(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -6112,20 +6228,8 @@ Value *TranslateOr(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); - Type *Ty = CI->getType(); - Type *EltTy = Ty->getScalarType(); IRBuilder<> Builder(CI); - if (Ty != EltTy) { - Value *Result = UndefValue::get(Ty); - for (unsigned i = 0; i < Ty->getVectorNumElements(); i++) { - Value *EltX = Builder.CreateExtractElement(x, i); - Value *EltY = Builder.CreateExtractElement(y, i); - Value *tmp = Builder.CreateOr(EltX, EltY); - Result = Builder.CreateInsertElement(Result, tmp, i); - } - return Result; - } return Builder.CreateOr(x, y); } Value *TranslateSelect(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, @@ -6166,7 +6270,6 @@ Value *EmptyLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode, } // SPIRV change starts -#ifdef ENABLE_SPIRV_CODEGEN Value *UnsupportedVulkanIntrinsic(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode, HLOperationLowerHelper &helper, @@ -6176,7 +6279,6 @@ Value *UnsupportedVulkanIntrinsic(CallInst *CI, IntrinsicOp IOP, dxilutil::EmitErrorOnInstruction(CI, "Unsupported Vulkan intrinsic."); return nullptr; } -#endif // ENABLE_SPIRV_CODEGEN // SPIRV change ends Value *StreamOutputLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode, @@ -6403,7 +6505,7 @@ IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP_asint16, TranslateBitcast, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_asuint, TranslateAsUint, DXIL::OpCode::SplitDouble}, {IntrinsicOp::IOP_asuint16, TranslateAsUint, DXIL::OpCode::NumOpCodes}, - {IntrinsicOp::IOP_atan, TrivialUnaryOperation, DXIL::OpCode::Atan}, + {IntrinsicOp::IOP_atan, TrivialVectorizableUnaryOperation, DXIL::OpCode::Atan}, {IntrinsicOp::IOP_atan2, TranslateAtan2, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_ceil, TrivialUnaryOperation, DXIL::OpCode::Round_pi}, {IntrinsicOp::IOP_clamp, TranslateClamp, DXIL::OpCode::NumOpCodes}, @@ -6446,7 +6548,7 @@ IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP_firstbitlow, TranslateFirstbitLo, DXIL::OpCode::FirstbitLo}, {IntrinsicOp::IOP_floor, TrivialUnaryOperation, DXIL::OpCode::Round_ni}, - {IntrinsicOp::IOP_fma, TrivialTrinaryOperation, DXIL::OpCode::Fma}, + {IntrinsicOp::IOP_fma, TranslateFMA, DXIL::OpCode::Fma}, {IntrinsicOp::IOP_fmod, TranslateFMod, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_frac, TrivialUnaryOperation, DXIL::OpCode::Frc}, {IntrinsicOp::IOP_frexp, TranslateFrexp, DXIL::OpCode::NumOpCodes}, @@ -6494,7 +6596,7 @@ IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP_sqrt, TrivialUnaryOperation, DXIL::OpCode::Sqrt}, {IntrinsicOp::IOP_step, TranslateStep, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_tan, TrivialUnaryOperation, DXIL::OpCode::Tan}, - {IntrinsicOp::IOP_tanh, TrivialUnaryOperation, DXIL::OpCode::Htan}, + {IntrinsicOp::IOP_tanh, TrivialVectorizableUnaryOperation, DXIL::OpCode::Htan}, {IntrinsicOp::IOP_tex1D, EmptyLower, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_tex1Dbias, EmptyLower, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_tex1Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes}, @@ -6521,7 +6623,6 @@ IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP_unpack_s8s32, TranslateUnpack, DXIL::OpCode::Unpack4x8}, {IntrinsicOp::IOP_unpack_u8u16, TranslateUnpack, DXIL::OpCode::Unpack4x8}, {IntrinsicOp::IOP_unpack_u8u32, TranslateUnpack, DXIL::OpCode::Unpack4x8}, -#ifdef ENABLE_SPIRV_CODEGEN {IntrinsicOp::IOP_VkRawBufferLoad, UnsupportedVulkanIntrinsic, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_VkRawBufferStore, UnsupportedVulkanIntrinsic, @@ -6532,7 +6633,6 @@ IntrinsicLower gLowerTable[] = { DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_Vkext_execution_mode_id, UnsupportedVulkanIntrinsic, DXIL::OpCode::NumOpCodes}, -#endif // ENABLE_SPIRV_CODEGEN {IntrinsicOp::MOP_Append, StreamOutputLower, DXIL::OpCode::EmitStream}, {IntrinsicOp::MOP_RestartStrip, StreamOutputLower, DXIL::OpCode::CutStream}, {IntrinsicOp::MOP_CalculateLevelOfDetail, TranslateCalculateLOD, @@ -6760,11 +6860,9 @@ IntrinsicLower gLowerTable[] = { {IntrinsicOp::MOP_OutputComplete, TranslateNodeOutputComplete, DXIL::OpCode::OutputComplete}, -// SPIRV change starts -#ifdef ENABLE_SPIRV_CODEGEN + // SPIRV change starts {IntrinsicOp::MOP_SubpassLoad, UnsupportedVulkanIntrinsic, DXIL::OpCode::NumOpCodes}, -#endif // ENABLE_SPIRV_CODEGEN // SPIRV change ends // Manually added part. @@ -7830,6 +7928,62 @@ void TranslateCBOperationsLegacy(Value *handle, Value *ptr, OP *hlslOP, // Structured buffer. namespace { +// Returns load call return value +// and the elements extracted from it in `Elts` +static Value *GenerateBufLd(hlsl::OP *OP, IRBuilder<> &Builder, + OP::OpCode opcode, Type *RegTy, Type *MemTy, + unsigned NumElements, ArrayRef Args, + std::vector::iterator EltIt) { + const bool isTyped = + (opcode == OP::OpCode::BufferLoad || opcode == OP::OpCode::TextureLoad); + // Value will be loaded in its memory representation. + Function *F = OP->GetOpFunc(opcode, MemTy); + Value *Ld = Builder.CreateCall(F, Args, OP::GetOpCodeName(opcode)); + + // Extract values and convet to register type if needed. + // 64-bit types need to have the two i32 merged into their result + // bools need to use cmp to convert them to i1s. + + if (isTyped && RegTy->isDoubleTy()) { + DXASSERT(NumElements <= 2, "typed buffers only allow 4 dwords"); + Type *doubleTy = Builder.getDoubleTy(); + Function *makeDouble = OP->GetOpFunc(DXIL::OpCode::MakeDouble, doubleTy); + Value *makeDoubleOpArg = + Builder.getInt32((unsigned)DXIL::OpCode::MakeDouble); + for (unsigned i = 0; i < NumElements; i++, EltIt++) { + Value *lo = Builder.CreateExtractValue(Ld, 2 * i); + Value *hi = Builder.CreateExtractValue(Ld, 2 * i + 1); + Value *V = Builder.CreateCall(makeDouble, {makeDoubleOpArg, lo, hi}); + *EltIt = V; + } + return Ld; + } + + if (isTyped && RegTy->isIntegerTy(64)) { + DXASSERT(NumElements <= 2, "typed buffers only allow 4 dwords"); + Type *i64Ty = Builder.getInt64Ty(); + for (unsigned i = 0; i < NumElements; i++, EltIt++) { + Value *lo = Builder.CreateExtractValue(Ld, 2 * i); + Value *hi = Builder.CreateExtractValue(Ld, 2 * i + 1); + lo = Builder.CreateZExt(lo, i64Ty); + hi = Builder.CreateZExt(hi, i64Ty); + hi = Builder.CreateShl(hi, 32); + *EltIt = Builder.CreateOr(lo, hi); + } + return Ld; + } + + if (opcode == OP::OpCode::RawBufferVectorLoad) { + DXASSERT(RegTy != Builder.getInt64Ty() && RegTy != Builder.getDoubleTy(), + "64-bit type conversions for longvecs requires op support"); + *EltIt = Builder.CreateExtractValue(Ld, 0); + } else + for (unsigned i = 0; i < NumElements; i++, EltIt++) + *EltIt = Builder.CreateExtractValue(Ld, i); + + return Ld; +} + Value *GenerateRawBufLd(Value *handle, Value *bufIdx, Value *offset, Value *status, Type *EltTy, MutableArrayRef resultElts, hlsl::OP *OP, @@ -7887,113 +8041,38 @@ void GenerateStructBufSt(Value *handle, Value *bufIdx, Value *offset, Builder.CreateCall(dxilF, Args); } -static Value *TranslateRawBufVecLd(Type *VecEltTy, unsigned ElemCount, - IRBuilder<> &Builder, Value *handle, - hlsl::OP *OP, Value *status, Value *bufIdx, - Value *baseOffset, const DataLayout &DL, - std::vector &bufLds, - unsigned baseAlign, bool isScalarTy) { - - unsigned EltSize = DL.getTypeAllocSize(VecEltTy); - unsigned alignment = std::min(baseAlign, EltSize); - Constant *alignmentVal = OP->GetI32Const(alignment); - - if (baseOffset == nullptr) { - baseOffset = OP->GetU32Const(0); - } - - std::vector elts(ElemCount); - unsigned rest = (ElemCount % 4); - for (unsigned i = 0; i < ElemCount - rest; i += 4) { - Value *ResultElts[4]; - Value *bufLd = - GenerateRawBufLd(handle, bufIdx, baseOffset, status, VecEltTy, - ResultElts, OP, Builder, 4, alignmentVal); - bufLds.emplace_back(bufLd); - elts[i] = ResultElts[0]; - elts[i + 1] = ResultElts[1]; - elts[i + 2] = ResultElts[2]; - elts[i + 3] = ResultElts[3]; - - baseOffset = Builder.CreateAdd(baseOffset, OP->GetU32Const(4 * EltSize)); - } - - if (rest) { - Value *ResultElts[4]; - Value *bufLd = - GenerateRawBufLd(handle, bufIdx, baseOffset, status, VecEltTy, - ResultElts, OP, Builder, rest, alignmentVal); - bufLds.emplace_back(bufLd); - for (unsigned i = 0; i < rest; i++) - elts[ElemCount - rest + i] = ResultElts[i]; - } - - // If the expected return type is scalar then skip building a vector - if (isScalarTy) { - return elts[0]; - } - - Value *Vec = HLMatrixLower::BuildVector(VecEltTy, elts, Builder); - return Vec; -} - -Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder, - Value *handle, hlsl::OP *OP, Value *status, - Value *bufIdx, Value *baseOffset, +Value *TranslateStructBufMatLd(CallInst *CI, IRBuilder<> &Builder, + Value *handle, HLResource::Kind RK, hlsl::OP *OP, + Value *status, Value *bufIdx, Value *baseOffset, const DataLayout &DL) { + + ResLoadHelper helper(CI, RK, handle, bufIdx, baseOffset); +#ifndef NDEBUG + Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx); + Type *matType = ptr->getType()->getPointerElementType(); HLMatrixType MatTy = HLMatrixType::cast(matType); - Type *EltTy = MatTy.getElementTypeForMem(); - unsigned matSize = MatTy.getNumElements(); - std::vector bufLds; - Value *Vec = - TranslateRawBufVecLd(EltTy, matSize, Builder, handle, OP, status, bufIdx, - baseOffset, DL, bufLds, /*baseAlign (in bytes)*/ 8); - Vec = MatTy.emitLoweredMemToReg(Vec, Builder); - return Vec; + DXASSERT(MatTy.getLoweredVectorType(false /*MemRepr*/) == + helper.retVal->getType(), + "helper type should match vectorized matrix"); +#endif + return TranslateBufLoad(helper, RK, Builder, OP, DL); } void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle, hlsl::OP *OP, Value *bufIdx, Value *baseOffset, Value *val, const DataLayout &DL) { +#ifndef NDEBUG HLMatrixType MatTy = HLMatrixType::cast(matType); - Type *EltTy = MatTy.getElementTypeForMem(); - - val = MatTy.emitLoweredRegToMem(val, Builder); - - unsigned EltSize = DL.getTypeAllocSize(EltTy); - Constant *Alignment = OP->GetI32Const(EltSize); - Value *offset = baseOffset; - if (baseOffset == nullptr) - offset = OP->GetU32Const(0); - - unsigned matSize = MatTy.getNumElements(); - Value *undefElt = UndefValue::get(EltTy); - - unsigned storeSize = matSize; - if (matSize % 4) { - storeSize = matSize + 4 - (matSize & 3); - } - std::vector elts(storeSize, undefElt); - for (unsigned i = 0; i < matSize; i++) - elts[i] = Builder.CreateExtractElement(val, i); - - for (unsigned i = 0; i < matSize; i += 4) { - uint8_t mask = 0; - for (unsigned j = 0; j < 4 && (i + j) < matSize; j++) { - if (elts[i + j] != undefElt) - mask |= (1 << j); - } - GenerateStructBufSt(handle, bufIdx, offset, EltTy, OP, Builder, - {elts[i], elts[i + 1], elts[i + 2], elts[i + 3]}, mask, - Alignment); - // Update offset by 4*4bytes. - offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * EltSize)); - } + DXASSERT(MatTy.getLoweredVectorType(false /*MemRepr*/) == val->getType(), + "helper type should match vectorized matrix"); +#endif + TranslateStore(DxilResource::Kind::StructuredBuffer, handle, val, bufIdx, + baseOffset, Builder, OP); } -void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, hlsl::OP *OP, - Value *status, Value *bufIdx, Value *baseOffset, - const DataLayout &DL) { +void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, HLResource::Kind RK, + hlsl::OP *OP, Value *status, Value *bufIdx, + Value *baseOffset, const DataLayout &DL) { IRBuilder<> Builder(CI); HLOpcodeGroup group = hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction()); unsigned opcode = GetHLOpcode(CI); @@ -8006,13 +8085,10 @@ void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, hlsl::OP *OP, // orientation. switch (matOp) { case HLMatLoadStoreOpcode::RowMatLoad: - case HLMatLoadStoreOpcode::ColMatLoad: { - Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx); - Value *NewLd = TranslateStructBufMatLd( - ptr->getType()->getPointerElementType(), Builder, handle, OP, status, - bufIdx, baseOffset, DL); - CI->replaceAllUsesWith(NewLd); - } break; + case HLMatLoadStoreOpcode::ColMatLoad: + TranslateStructBufMatLd(CI, Builder, handle, RK, OP, status, bufIdx, + baseOffset, DL); + break; case HLMatLoadStoreOpcode::RowMatStore: case HLMatLoadStoreOpcode::ColMatStore: { Value *ptr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx); @@ -8136,6 +8212,9 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle, GEP->eraseFromParent(); } else if (StoreInst *stUser = dyn_cast(subsUser)) { + // Store elements of matrix in a struct. Needs to be done one scalar at a + // time even for vectors in the case that matrix orientation spreads the + // indexed scalars throughout the matrix vector. IRBuilder<> stBuilder(stUser); Value *Val = stUser->getValueOperand(); if (Val->getType()->isVectorTy()) { @@ -8159,6 +8238,9 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle, LoadInst *ldUser = cast(subsUser); IRBuilder<> ldBuilder(ldUser); Value *ldData = UndefValue::get(resultType); + // Load elements of matrix in a struct. Needs to be done one scalar at a + // time even for vectors in the case that matrix orientation spreads the + // indexed scalars throughout the matrix vector. if (resultType->isVectorTy()) { for (unsigned i = 0; i < resultSize; i++) { Value *ResultElt; @@ -8283,57 +8365,26 @@ void TranslateStructBufSubscriptUser(Instruction *user, Value *handle, } userCall->eraseFromParent(); } else if (group == HLOpcodeGroup::HLMatLoadStore) - TranslateStructBufMatLdSt(userCall, handle, OP, status, bufIdx, + // Load/Store matrix within a struct + TranslateStructBufMatLdSt(userCall, handle, ResKind, OP, status, bufIdx, baseOffset, DL); else if (group == HLOpcodeGroup::HLSubscript) { + // Subscript of matrix within a struct TranslateStructBufMatSubscript(userCall, handle, ResKind, bufIdx, baseOffset, status, OP, DL); } - } else if (isa(user) || isa(user)) { - LoadInst *LdInst = dyn_cast(user); - StoreInst *StInst = dyn_cast(user); - - Type *Ty = isa(user) ? LdInst->getType() - : StInst->getValueOperand()->getType(); - Type *pOverloadTy = Ty->getScalarType(); - Value *Offset = baseOffset; - - if (LdInst) { - unsigned NumComponents = 0; - if (VectorType *VTy = dyn_cast(Ty)) - NumComponents = VTy->getNumElements(); - else - NumComponents = 1; - Value *ResultElts[4]; - Constant *Alignment = - OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType())); - GenerateRawBufLd(handle, bufIdx, Offset, status, pOverloadTy, ResultElts, - OP, Builder, NumComponents, Alignment); - Value *NewLd = ScalarizeElements(Ty, ResultElts, Builder); - LdInst->replaceAllUsesWith(NewLd); - } else { - Value *val = StInst->getValueOperand(); - Value *undefVal = llvm::UndefValue::get(pOverloadTy); - Value *vals[] = {undefVal, undefVal, undefVal, undefVal}; - uint8_t mask = 0; - if (Ty->isVectorTy()) { - unsigned vectorNumElements = Ty->getVectorNumElements(); - DXASSERT(vectorNumElements <= 4, "up to 4 elements in vector"); - assert(vectorNumElements <= 4); - for (unsigned i = 0; i < vectorNumElements; i++) { - vals[i] = Builder.CreateExtractElement(val, i); - mask |= (1 << i); - } - } else { - vals[0] = val; - mask = DXIL::kCompMask_X; - } - Constant *alignment = - OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType())); - GenerateStructBufSt(handle, bufIdx, Offset, pOverloadTy, OP, Builder, - vals, mask, alignment); - } - user->eraseFromParent(); + } else if (LoadInst *LdInst = dyn_cast(user)) { + // Load of scalar/vector within a struct or structured raw load. + ResLoadHelper helper(LdInst, ResKind, handle, bufIdx, baseOffset); + TranslateBufLoad(helper, ResKind, Builder, OP, DL); + + LdInst->eraseFromParent(); + } else if (StoreInst *StInst = dyn_cast(user)) { + // Store of scalar/vector within a struct or structured raw store. + Value *val = StInst->getValueOperand(); + TranslateStore(DxilResource::Kind::StructuredBuffer, handle, val, bufIdx, + baseOffset, Builder, OP); + StInst->eraseFromParent(); } else if (BitCastInst *BCI = dyn_cast(user)) { // Recurse users for (auto U = BCI->user_begin(); U != BCI->user_end();) { @@ -8368,13 +8419,18 @@ void TranslateStructBufSubscriptUser(Instruction *user, Value *handle, DXASSERT_LOCALVAR(Ty, offset->getType() == Type::getInt32Ty(Ty->getContext()), "else bitness is wrong"); - offset = Builder.CreateAdd(offset, baseOffset); + // Raw buffers can't have defined offsets, apply to index. + if (DXIL::IsRawBuffer(ResKind)) + bufIdx = Builder.CreateAdd(offset, bufIdx); + else + baseOffset = Builder.CreateAdd(offset, baseOffset); for (auto U = GEP->user_begin(); U != GEP->user_end();) { Value *GEPUser = *(U++); TranslateStructBufSubscriptUser(cast(GEPUser), handle, - ResKind, bufIdx, offset, status, OP, DL); + ResKind, bufIdx, baseOffset, status, OP, + DL); } // delete the inst GEP->eraseFromParent(); @@ -8388,13 +8444,12 @@ void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status, CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx); Value *bufIdx = nullptr; Value *offset = nullptr; - if (ResKind == HLResource::Kind::RawBuffer) { - offset = subscriptIndex; - } else { + bufIdx = subscriptIndex; + if (ResKind == HLResource::Kind::RawBuffer) + offset = UndefValue::get(Type::getInt32Ty(CI->getContext())); + else // StructuredBuffer, TypedBuffer, etc. - bufIdx = subscriptIndex; offset = OP->GetU32Const(0); - } for (auto U = CI->user_begin(); U != CI->user_end();) { Value *user = *(U++); @@ -8408,19 +8463,14 @@ void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status, // HLSubscript. namespace { -Value *TranslateTypedBufLoad(CallInst *CI, DXIL::ResourceKind RK, +Value *TranslateTypedBufSubscript(CallInst *CI, DXIL::ResourceKind RK, DXIL::ResourceClass RC, Value *handle, LoadInst *ldInst, IRBuilder<> &Builder, hlsl::OP *hlslOP, const DataLayout &DL) { - ResLoadHelper ldHelper(CI, RK, RC, handle, IntrinsicOp::MOP_Load, - /*bForSubscript*/ true); - // Default sampleIdx for 2DMS textures. - if (RK == DxilResource::Kind::Texture2DMS || - RK == DxilResource::Kind::Texture2DMSArray) - ldHelper.mipLevel = hlslOP->GetU32Const(0); - // use ldInst as retVal - ldHelper.retVal = ldInst; - TranslateLoad(ldHelper, RK, Builder, hlslOP, DL); + // The arguments to the call instruction are used to determine the access, + // the return value and type come from the load instruction. + ResLoadHelper ldHelper(CI, RK, RC, handle, IntrinsicOp::MOP_Load, ldInst); + TranslateBufLoad(ldHelper, RK, Builder, hlslOP, DL); // delete the ld ldInst->eraseFromParent(); return ldHelper.retVal; @@ -8463,7 +8513,7 @@ Value *UpdateVectorElt(Value *VecVal, Value *EltVal, Value *EltIdx, return VecVal; } -void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper, +void TranslateTypedBufferSubscript(CallInst *CI, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { Value *ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx); @@ -8480,14 +8530,15 @@ void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper, User *user = *(It++); Instruction *I = cast(user); IRBuilder<> Builder(I); + Value *UndefI = UndefValue::get(Builder.getInt32Ty()); if (LoadInst *ldInst = dyn_cast(user)) { - TranslateTypedBufLoad(CI, RK, RC, handle, ldInst, Builder, hlslOP, + TranslateTypedBufSubscript(CI, RK, RC, handle, ldInst, Builder, hlslOP, helper.dataLayout); } else if (StoreInst *stInst = dyn_cast(user)) { Value *val = stInst->getValueOperand(); TranslateStore(RK, handle, val, - CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx), - Builder, hlslOP); + CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx), + UndefI, Builder, hlslOP); // delete the st stInst->eraseFromParent(); } else if (GetElementPtrInst *GEP = dyn_cast(user)) { @@ -8504,7 +8555,7 @@ void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper, // Generate Ld. LoadInst *tmpLd = StBuilder.CreateLoad(CI); - Value *ldVal = TranslateTypedBufLoad( + Value *ldVal = TranslateTypedBufSubscript( CI, RK, RC, handle, tmpLd, StBuilder, hlslOP, helper.dataLayout); // Update vector. ldVal = UpdateVectorElt(ldVal, SI->getValueOperand(), EltIdx, @@ -8512,9 +8563,10 @@ void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper, // Generate St. // Reset insert point, UpdateVectorElt may move SI to different block. StBuilder.SetInsertPoint(SI); - TranslateStore(RK, handle, ldVal, - CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx), - StBuilder, hlslOP); + TranslateStore( + RK, handle, ldVal, + CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx), UndefI, + StBuilder, hlslOP); SI->eraseFromParent(); continue; } @@ -8524,7 +8576,7 @@ void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper, // Generate tmp vector load with vector type & translate it LoadInst *tmpLd = LdBuilder.CreateLoad(CI); - Value *ldVal = TranslateTypedBufLoad( + Value *ldVal = TranslateTypedBufSubscript( CI, RK, RC, handle, tmpLd, LdBuilder, hlslOP, helper.dataLayout); // get the single element @@ -8697,15 +8749,17 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode, DXASSERT(CI->hasOneUse(), "subscript should only have one use"); IRBuilder<> Builder(CI); if (LoadInst *ldInst = dyn_cast(*U)) { - ResLoadHelper ldHelper(ldInst, handle, coord, mipLevel); - TranslateLoad(ldHelper, RK, Builder, hlslOP, helper.dataLayout); + Value *Offset = UndefValue::get(Builder.getInt32Ty()); + ResLoadHelper ldHelper(ldInst, RK, handle, coord, Offset, mipLevel); + TranslateBufLoad(ldHelper, RK, Builder, hlslOP, helper.dataLayout); ldInst->eraseFromParent(); } else { StoreInst *stInst = cast(*U); Value *val = stInst->getValueOperand(); + Value *UndefI = UndefValue::get(Builder.getInt32Ty()); TranslateStore(RK, handle, val, - CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx), - Builder, hlslOP, mipLevel); + CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx), + UndefI, Builder, hlslOP, mipLevel); stInst->eraseFromParent(); } Translated = true; @@ -8736,7 +8790,7 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode, TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP, RK, helper.dataLayout); else - TranslateDefaultSubscript(CI, helper, pObjHelper, Translated); + TranslateTypedBufferSubscript(CI, helper, pObjHelper, Translated); return; } diff --git a/lib/Transforms/Scalar/DxilEliminateVector.cpp b/lib/Transforms/Scalar/DxilEliminateVector.cpp index 366f011dae..bb9cf43594 100644 --- a/lib/Transforms/Scalar/DxilEliminateVector.cpp +++ b/lib/Transforms/Scalar/DxilEliminateVector.cpp @@ -10,6 +10,8 @@ // // /////////////////////////////////////////////////////////////////////////////// +#include "dxc/DXIL/DxilModule.h" + #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/Pass.h" @@ -151,6 +153,10 @@ bool DxilEliminateVector::TryRewriteDebugInfoForVector(InsertElementInst *IE) { bool DxilEliminateVector::runOnFunction(Function &F) { + if (F.getParent()->HasDxilModule()) + if (F.getParent()->GetDxilModule().GetShaderModel()->IsSM69Plus()) + return false; + auto *DT = &getAnalysis().getDomTree(); DxilValueCache *DVC = &getAnalysis(); diff --git a/lib/Transforms/Scalar/LowerTypePasses.cpp b/lib/Transforms/Scalar/LowerTypePasses.cpp index feeb23a5da..6d6b93f951 100644 --- a/lib/Transforms/Scalar/LowerTypePasses.cpp +++ b/lib/Transforms/Scalar/LowerTypePasses.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "dxc/DXIL/DxilConstants.h" +#include "dxc/DXIL/DxilModule.h" #include "dxc/DXIL/DxilOperations.h" #include "dxc/DXIL/DxilUtil.h" #include "dxc/HLSL/HLModule.h" @@ -180,10 +181,12 @@ bool LowerTypePass::runOnModule(Module &M) { namespace { class DynamicIndexingVectorToArray : public LowerTypePass { bool ReplaceAllVectors; + bool SupportsVectors; public: explicit DynamicIndexingVectorToArray(bool ReplaceAll = false) - : LowerTypePass(ID), ReplaceAllVectors(ReplaceAll) {} + : LowerTypePass(ID), ReplaceAllVectors(ReplaceAll), + SupportsVectors(false) {} static char ID; // Pass identification, replacement for typeid void applyOptions(PassOptions O) override; void dumpConfig(raw_ostream &OS) override; @@ -194,6 +197,7 @@ class DynamicIndexingVectorToArray : public LowerTypePass { Type *lowerType(Type *Ty) override; Constant *lowerInitVal(Constant *InitVal, Type *NewTy) override; StringRef getGlobalPrefix() override { return ".v"; } + void initialize(Module &M) override; private: bool HasVectorDynamicIndexing(Value *V); @@ -207,6 +211,11 @@ class DynamicIndexingVectorToArray : public LowerTypePass { void ReplaceAddrSpaceCast(ConstantExpr *CE, Value *A, IRBuilder<> &Builder); }; +void DynamicIndexingVectorToArray::initialize(Module &M) { + if (M.HasHLModule()) + SupportsVectors = M.GetHLModule().GetShaderModel()->IsSM69Plus(); +} + void DynamicIndexingVectorToArray::applyOptions(PassOptions O) { GetPassOptionBool(O, "ReplaceAllVectors", &ReplaceAllVectors, ReplaceAllVectors); @@ -286,7 +295,7 @@ void DynamicIndexingVectorToArray::ReplaceStaticIndexingOnVector(Value *V) { StoreInst *stInst = cast(GEPUser); Value *val = stInst->getValueOperand(); Value *ldVal = Builder.CreateLoad(V); - ldVal = Builder.CreateInsertElement(ldVal, val, constIdx); + ldVal = Builder.CreateInsertElement(ldVal, val, constIdx); // UGH Builder.CreateStore(ldVal, V); stInst->eraseFromParent(); } @@ -306,8 +315,11 @@ void DynamicIndexingVectorToArray::ReplaceStaticIndexingOnVector(Value *V) { } bool DynamicIndexingVectorToArray::needToLower(Value *V) { + // Only needed where vectors aren't supported. + if (SupportsVectors) + return false; Type *Ty = V->getType()->getPointerElementType(); - if (dyn_cast(Ty)) { + if (isa(Ty)) { if (isa(V) || ReplaceAllVectors) { return true; } diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp index 0c3e13f608..6737c9100e 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp @@ -1869,7 +1869,8 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) { // if // all its users can be transformed, then split up the aggregate into its // separate elements. - if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) { + if (!HLM.GetShaderModel()->IsSM69Plus() && ShouldAttemptScalarRepl(AI) && + isSafeAllocaToScalarRepl(AI)) { std::vector Elts; IRBuilder<> Builder(dxilutil::FindAllocaInsertionPt(AI)); bool hasPrecise = HLModule::HasPreciseAttributeWithMetadata(AI); @@ -1945,8 +1946,9 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) { continue; } - // Flat Global vector if no dynamic vector indexing. - bool bFlatVector = !hasDynamicVectorIndexing(GV); + // Flat Global vector if no dynamic vector indexing and pre-6.9. + bool bFlatVector = + !hasDynamicVectorIndexing(GV) && !HLM.GetShaderModel()->IsSM69Plus(); if (bFlatVector) { GVDbgOffset &dbgOffset = GVDbgOffsetMap[GV]; @@ -1980,10 +1982,12 @@ bool SROAGlobalAndAllocas(HLModule &HLM, bool bHasDbgInfo) { } else { // SROA_Parameter_HLSL has no access to a domtree, if one is needed, // it'll be generated - SROAed = SROA_Helper::DoScalarReplacement( - GV, Elts, Builder, bFlatVector, - // TODO: set precise. - /*hasPrecise*/ false, typeSys, DL, DeadInsts, /*DT*/ nullptr); + if (!HLM.GetShaderModel()->IsSM69Plus()) { + SROAed = SROA_Helper::DoScalarReplacement( + GV, Elts, Builder, bFlatVector, + // TODO: set precise. + /*hasPrecise*/ false, typeSys, DL, DeadInsts, /*DT*/ nullptr); + } } if (SROAed) { diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp index 729771c7c7..1b07d5f14f 100644 --- a/lib/Transforms/Scalar/Scalarizer.cpp +++ b/lib/Transforms/Scalar/Scalarizer.cpp @@ -14,6 +14,8 @@ // //===----------------------------------------------------------------------===// +#include "dxc/DXIL/DxilModule.h" + #include "llvm/ADT/STLExtras.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" @@ -290,6 +292,10 @@ bool Scalarizer::doInitialization(Module &M) { } bool Scalarizer::runOnFunction(Function &F) { + if (F.getParent()->HasDxilModule()) + if (F.getParent()->GetDxilModule().GetShaderModel()->IsSM69Plus()) + return false; + for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) { BasicBlock *BB = BBI; for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) { diff --git a/tools/clang/include/clang/AST/DeclCXX.h b/tools/clang/include/clang/AST/DeclCXX.h index 3b07576545..36e0f99c82 100644 --- a/tools/clang/include/clang/AST/DeclCXX.h +++ b/tools/clang/include/clang/AST/DeclCXX.h @@ -465,6 +465,10 @@ class CXXRecordDecl : public RecordDecl { /// \brief Whether we are currently parsing base specifiers. bool IsParsingBaseSpecifiers : 1; + /// \brief Whether this class contains at least one member or base + /// class containing an HLSL vector longer than 4 elements. + bool HasHLSLLongVector : 1; + /// \brief The number of base class specifiers in Bases. unsigned NumBases; @@ -1018,6 +1022,13 @@ class CXXRecordDecl : public RecordDecl { return data().NeedOverloadResolutionForDestructor; } + // HLSL Change add HLSL Long vector bit. + /// \brief Determine whether this class contains an HLSL long vector + /// of over 4 elements. + bool hasHLSLLongVector() { return data().HasHLSLLongVector; } + /// \brief Set that this class contains an HLSL long vector of over 4 elements + bool setHasHLSLLongVector() { return data().HasHLSLLongVector = true; } + /// \brief Determine whether this class describes a lambda function object. bool isLambda() const { // An update record can't turn a non-lambda into a lambda. diff --git a/tools/clang/include/clang/AST/HlslTypes.h b/tools/clang/include/clang/AST/HlslTypes.h index 2aa9afa5f9..e6a50de8fb 100644 --- a/tools/clang/include/clang/AST/HlslTypes.h +++ b/tools/clang/include/clang/AST/HlslTypes.h @@ -348,9 +348,10 @@ void AddHLSLNodeOutputRecordTemplate( _Outptr_ clang::ClassTemplateDecl **outputRecordTemplateDecl, bool isCompleteType = true); -clang::CXXRecordDecl *DeclareRecordTypeWithHandle(clang::ASTContext &context, - llvm::StringRef name, - bool isCompleteType = true); +clang::CXXRecordDecl * +DeclareRecordTypeWithHandle(clang::ASTContext &context, llvm::StringRef name, + bool isCompleteType = true, + clang::InheritableAttr *Attr = nullptr); void AddRaytracingConstants(clang::ASTContext &context); void AddSamplerFeedbackConstants(clang::ASTContext &context); @@ -381,14 +382,14 @@ clang::CXXRecordDecl *DeclareTemplateTypeWithHandleInDeclContext( clang::CXXRecordDecl *DeclareUIntTemplatedTypeWithHandle( clang::ASTContext &context, llvm::StringRef typeName, - llvm::StringRef templateParamName, - clang::TagTypeKind tagKind = clang::TagTypeKind::TTK_Class); + llvm::StringRef templateParamName, clang::InheritableAttr *Attr = nullptr); clang::CXXRecordDecl *DeclareUIntTemplatedTypeWithHandleInDeclContext( clang::ASTContext &context, clang::DeclContext *declContext, llvm::StringRef typeName, llvm::StringRef templateParamName, - clang::TagTypeKind tagKind = clang::TagTypeKind::TTK_Class); -clang::CXXRecordDecl *DeclareConstantBufferViewType(clang::ASTContext &context, - bool bTBuf); + clang::InheritableAttr *Attr = nullptr); +clang::CXXRecordDecl * +DeclareConstantBufferViewType(clang::ASTContext &context, + clang::InheritableAttr *Attr); clang::CXXRecordDecl *DeclareRayQueryType(clang::ASTContext &context); clang::CXXRecordDecl *DeclareResourceType(clang::ASTContext &context, bool bSampler); diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td index 3a6718a339..9e48df51fd 100644 --- a/tools/clang/include/clang/Basic/Attr.td +++ b/tools/clang/include/clang/Basic/Attr.td @@ -939,6 +939,52 @@ def HLSLCXXOverload : InheritableAttr { let Documentation = [Undocumented]; } +def HLSLVector : InheritableAttr { + let Spellings = []; // No spellings! + let Subjects = SubjectList<[CXXRecord]>; + let Documentation = [Undocumented]; +} + +def HLSLMatrix : InheritableAttr { + let Spellings = []; // No spellings! + let Subjects = SubjectList<[CXXRecord]>; + let Documentation = [Undocumented]; +} + +def HLSLTessPatch : InheritableAttr { + let Spellings = []; // No spellings! + let Args = [BoolArgument<"IsInput">]; + let Subjects = SubjectList<[CXXRecord]>; + let Documentation = [Undocumented]; +} + +def HLSLStreamOutput : InheritableAttr { + let Spellings = []; // No spellings! + // PrimVertices are the number of vertices that make up the streamed + // primitive. Points have 1. Lines have 2. Triangles have 3. + let Args = [UnsignedArgument<"PrimVertices">]; + let Subjects = SubjectList<[CXXRecord]>; + let Documentation = [Undocumented]; +} + +def HLSLResource : InheritableAttr { + let Spellings = []; // No spellings! + let Args = [UnsignedArgument<"ResKindUint">, + UnsignedArgument<"ResClassUint">]; + let Subjects = SubjectList<[CXXRecord]>; + let Documentation = [Undocumented]; + + // Add enum typed getters for safety and brevity. + let AdditionalMembers = [{ + hlsl::DXIL::ResourceKind getResKind() const { + return (hlsl::DXIL::ResourceKind)getResKindUint(); + } + hlsl::DXIL::ResourceClass getResClass() const { + return (hlsl::DXIL::ResourceClass)getResClassUint(); + } + }]; +} + def HLSLNodeLaunch : InheritableAttr { let Spellings = [CXX11<"", "nodelaunch", 2017>]; let Args = [StringArgument<"LaunchType">]; // one of broadcasting, coalescing, thread @@ -992,13 +1038,6 @@ def HLSLNodeTrackRWInputSharing : InheritableAttr { let Documentation = [Undocumented]; } -def HLSLResource : InheritableAttr { - let Spellings = []; // No spellings! - let Args = [UnsignedArgument<"ResKind">, UnsignedArgument<"ResClass">]; - let Subjects = SubjectList<[CXXRecord]>; - let Documentation = [Undocumented]; -} - def HLSLNodeObject : InheritableAttr { let Spellings = []; // No spellings! let Subjects = SubjectList<[CXXRecord]>; diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index 99b6534e1f..d4ebb2ce21 100644 --- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7519,8 +7519,8 @@ def err_hlsl_half_load_store: Error< "LoadHalf and StoreHalf are not supported for min precision mode">; def err_hlsl_interfaces_cannot_inherit: Error< "interfaces cannot inherit from other types">; -def err_hlsl_invalid_range_1_4: Error< - "invalid value, valid range is between 1 and 4 inclusive">; +def err_hlsl_invalid_range_1_to_max + : Error<"invalid value, valid range is between 1 and %0 inclusive">; def err_hlsl_matrix_member_bad_format: Error< "invalid format for matrix subscript '%0'">; def err_hlsl_matrix_member_empty: Error< @@ -7549,6 +7549,8 @@ def err_hlsl_vector_element_index_out_of_bounds: Error< "vector element index '%0' is out of bounds">; def err_hlsl_vector_member_too_many_positions: Error< "more than four positions are referenced in '%0'">; +def err_hlsl_vector_member_on_long_vector: Error< + "Invalid swizzle '%0' on vector of over 4 elements.">; def err_hlsl_missing_type_specifier : Error< // Patterened after err_missing_type_specifier "HLSL requires a type specifier for all declarations">; def err_hlsl_multiple_concrete_bases : Error< @@ -7701,8 +7703,6 @@ def err_hlsl_control_flow_cond_not_scalar : Error< "%0 statement conditional expressions must evaluate to a scalar">; def err_hlsl_unsupportedvectortype : Error< "%0 is declared with type %1, but only primitive scalar values are supported">; -def err_hlsl_unsupportedvectorsize : Error< - "%0 is declared with size %1, but only values 1 through 4 are supported">; def err_hlsl_unsupportedmatrixsize : Error< "%0 is declared with size %1x%2, but only values 1 through 4 are supported">; def err_hlsl_norm_float_only : Error< @@ -7853,6 +7853,8 @@ def err_hlsl_load_from_mesh_out_arrays: Error< "output arrays of a mesh shader can not be read from">; def err_hlsl_out_indices_array_incorrect_access: Error< "a vector in out indices array must be accessed as a whole">; +def err_hlsl_unsupported_long_vector + : Error<"vectors of over 4 elements in %0 are not supported">; def err_hlsl_logical_binop_scalar : Error< "operands for short-circuiting logical binary operator must be scalar, for non-scalar types use '%select{and|or}0'">; def err_hlsl_ternary_scalar : Error< diff --git a/tools/clang/include/clang/Basic/LangOptions.h b/tools/clang/include/clang/Basic/LangOptions.h index 8dc15da5d8..433b767c8d 100644 --- a/tools/clang/include/clang/Basic/LangOptions.h +++ b/tools/clang/include/clang/Basic/LangOptions.h @@ -15,7 +15,7 @@ #ifndef LLVM_CLANG_BASIC_LANGOPTIONS_H #define LLVM_CLANG_BASIC_LANGOPTIONS_H -#include "dxc/DXIL/DxilConstants.h" // For DXIL::DefaultLinkage +#include "dxc/DXIL/DxilConstants.h" // For DXIL:: default values. #include "dxc/Support/HLSLVersion.h" #include "clang/Basic/CommentOptions.h" #include "clang/Basic/LLVM.h" @@ -168,6 +168,7 @@ class LangOptions : public LangOptionsBase { hlsl::DXIL::DefaultLinkage::Default; /// Whether use row major as default matrix major. bool HLSLDefaultRowMajor = false; + unsigned MaxHLSLVectorLength = hlsl::DXIL::kDefaultMaxVectorLength; // HLSL Change Ends bool SPIRV = false; // SPIRV Change diff --git a/tools/clang/include/clang/Sema/SemaHLSL.h b/tools/clang/include/clang/Sema/SemaHLSL.h index 40b030b430..7e7400d390 100644 --- a/tools/clang/include/clang/Sema/SemaHLSL.h +++ b/tools/clang/include/clang/Sema/SemaHLSL.h @@ -128,6 +128,8 @@ unsigned CaculateInitListArraySizeForHLSL(clang::Sema *sema, const clang::InitListExpr *InitList, const clang::QualType EltTy); +bool ContainsLongVector(clang::QualType qt); + bool IsConversionToLessOrEqualElements(clang::Sema *self, const clang::ExprResult &sourceExpr, const clang::QualType &targetType, diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp index 02125d5a84..870d032d39 100644 --- a/tools/clang/lib/AST/ASTContextHLSL.cpp +++ b/tools/clang/lib/AST/ASTContextHLSL.cpp @@ -329,6 +329,9 @@ void hlsl::AddHLSLMatrixTemplate(ASTContext &context, typeDeclBuilder.addField("h", vectorArrayType); + typeDeclBuilder.getRecordDecl()->addAttr( + HLSLMatrixAttr::CreateImplicit(context)); + // Add an operator[]. The operator ranges from zero to rowcount-1, and returns // a vector of colcount elements. const unsigned int templateDepth = 0; @@ -385,6 +388,9 @@ void hlsl::AddHLSLVectorTemplate(ASTContext &context, // Add an 'h' field to hold the handle. typeDeclBuilder.addField("h", vectorType); + typeDeclBuilder.getRecordDecl()->addAttr( + HLSLVectorAttr::CreateImplicit(context)); + // Add an operator[]. The operator ranges from zero to colcount-1, and returns // a scalar. @@ -525,11 +531,15 @@ hlsl::DeclareRecordTypeWithHandleAndNoMemberFunctions(ASTContext &context, /// CXXRecordDecl * hlsl::DeclareRecordTypeWithHandle(ASTContext &context, StringRef name, - bool isCompleteType /*= true */) { + bool isCompleteType /*= true */, + InheritableAttr *Attr) { BuiltinTypeDeclBuilder typeDeclBuilder(context.getTranslationUnitDecl(), name, TagDecl::TagKind::TTK_Struct); typeDeclBuilder.startDefinition(); typeDeclBuilder.addField("h", GetHLSLObjectHandleType(context)); + if (Attr) + typeDeclBuilder.getRecordDecl()->addAttr(Attr); + if (isCompleteType) return typeDeclBuilder.completeDefinition(); return typeDeclBuilder.getRecordDecl(); @@ -915,6 +925,7 @@ CXXRecordDecl *hlsl::DeclareTemplateTypeWithHandleInDeclContext( ASTContext &context, DeclContext *declContext, StringRef name, uint8_t templateArgCount, TypeSourceInfo *defaultTypeArgValue, InheritableAttr *Attr) { + DXASSERT(templateArgCount != 0, "otherwise caller should be creating a class or struct"); DXASSERT(templateArgCount <= 2, "otherwise the function needs to be updated " @@ -938,11 +949,9 @@ CXXRecordDecl *hlsl::DeclareTemplateTypeWithHandleInDeclContext( QualType elementType = context.getTemplateTypeParmType( /*templateDepth*/ 0, 0, ParameterPackFalse, elementTemplateParamDecl); - if (templateArgCount > 1 && - // Only need array type for inputpatch and outputpatch. - // Avoid Texture2DMS which may use 0 count. - // TODO: use hlsl types to do the check. - !name.startswith("Texture") && !name.startswith("RWTexture")) { + // Only need array type for inputpatch and outputpatch. + if (Attr && isa(Attr)) { + DXASSERT(templateArgCount == 2, "Tess patches need 2 template params"); Expr *countExpr = DeclRefExpr::Create( context, NestedNameSpecifierLoc(), NoLoc, countTemplateParamDecl, false, DeclarationNameInfo(countTemplateParamDecl->getDeclName(), NoLoc), @@ -1098,41 +1107,50 @@ CXXMethodDecl *hlsl::CreateObjectFunctionDeclarationWithParams( CXXRecordDecl *hlsl::DeclareUIntTemplatedTypeWithHandle( ASTContext &context, StringRef typeName, StringRef templateParamName, - TagTypeKind tagKind) { + InheritableAttr *Attr) { return DeclareUIntTemplatedTypeWithHandleInDeclContext( context, context.getTranslationUnitDecl(), typeName, templateParamName, - tagKind); + Attr); } CXXRecordDecl *hlsl::DeclareUIntTemplatedTypeWithHandleInDeclContext( ASTContext &context, DeclContext *declContext, StringRef typeName, - StringRef templateParamName, TagTypeKind tagKind) { + StringRef templateParamName, InheritableAttr *Attr) { // template FeedbackTexture2D[Array] { ... } - BuiltinTypeDeclBuilder typeDeclBuilder(declContext, typeName, tagKind); + BuiltinTypeDeclBuilder typeDeclBuilder(declContext, typeName, + TagTypeKind::TTK_Class); typeDeclBuilder.addIntegerTemplateParam(templateParamName, context.UnsignedIntTy); typeDeclBuilder.startDefinition(); typeDeclBuilder.addField( "h", context.UnsignedIntTy); // Add an 'h' field to hold the handle. + if (Attr) + typeDeclBuilder.getRecordDecl()->addAttr(Attr); + return typeDeclBuilder.getRecordDecl(); } clang::CXXRecordDecl * -hlsl::DeclareConstantBufferViewType(clang::ASTContext &context, bool bTBuf) { +hlsl::DeclareConstantBufferViewType(clang::ASTContext &context, + InheritableAttr *Attr) { // Create ConstantBufferView template declaration in translation unit scope // like other resource. // template ConstantBuffer { int h; } DeclContext *DC = context.getTranslationUnitDecl(); + DXASSERT(Attr, "Constbuffer types require an attribute"); - BuiltinTypeDeclBuilder typeDeclBuilder( - DC, bTBuf ? "TextureBuffer" : "ConstantBuffer", - TagDecl::TagKind::TTK_Struct); + const char *TypeName = "ConstantBuffer"; + if (IsTBuffer(cast(Attr)->getResKind())) + TypeName = "TextureBuffer"; + BuiltinTypeDeclBuilder typeDeclBuilder(DC, TypeName, + TagDecl::TagKind::TTK_Struct); (void)typeDeclBuilder.addTypeTemplateParam("T"); typeDeclBuilder.startDefinition(); CXXRecordDecl *templateRecordDecl = typeDeclBuilder.getRecordDecl(); typeDeclBuilder.addField( "h", context.UnsignedIntTy); // Add an 'h' field to hold the handle. + typeDeclBuilder.getRecordDecl()->addAttr(Attr); typeDeclBuilder.getRecordDecl(); diff --git a/tools/clang/lib/AST/DeclCXX.cpp b/tools/clang/lib/AST/DeclCXX.cpp index 9ef771b932..5f8c186919 100644 --- a/tools/clang/lib/AST/DeclCXX.cpp +++ b/tools/clang/lib/AST/DeclCXX.cpp @@ -48,34 +48,31 @@ void LazyASTUnresolvedSet::getFromExternalSource(ASTContext &C) const { } CXXRecordDecl::DefinitionData::DefinitionData(CXXRecordDecl *D) - : UserDeclaredConstructor(false), UserDeclaredSpecialMembers(0), - Aggregate(true), PlainOldData(true), Empty(true), Polymorphic(false), - Abstract(false), IsStandardLayout(true), HasNoNonEmptyBases(true), - HasPrivateFields(false), HasProtectedFields(false), HasPublicFields(false), - HasMutableFields(false), HasVariantMembers(false), HasOnlyCMembers(true), - HasInClassInitializer(false), HasUninitializedReferenceMember(false), - NeedOverloadResolutionForMoveConstructor(false), - NeedOverloadResolutionForMoveAssignment(false), - NeedOverloadResolutionForDestructor(false), - DefaultedMoveConstructorIsDeleted(false), - DefaultedMoveAssignmentIsDeleted(false), - DefaultedDestructorIsDeleted(false), - HasTrivialSpecialMembers(SMF_All), - DeclaredNonTrivialSpecialMembers(0), - HasIrrelevantDestructor(true), - HasConstexprNonCopyMoveConstructor(false), - DefaultedDefaultConstructorIsConstexpr(true), - HasConstexprDefaultConstructor(false), - HasNonLiteralTypeFieldsOrBases(false), ComputedVisibleConversions(false), - UserProvidedDefaultConstructor(false), DeclaredSpecialMembers(0), - ImplicitCopyConstructorHasConstParam(true), - ImplicitCopyAssignmentHasConstParam(true), - HasDeclaredCopyConstructorWithConstParam(false), - HasDeclaredCopyAssignmentWithConstParam(false), - IsLambda(false), IsParsingBaseSpecifiers(false), NumBases(0), NumVBases(0), - Bases(), VBases(), - Definition(D), FirstFriend() { -} + : UserDeclaredConstructor(false), UserDeclaredSpecialMembers(0), + Aggregate(true), PlainOldData(true), Empty(true), Polymorphic(false), + Abstract(false), IsStandardLayout(true), HasNoNonEmptyBases(true), + HasPrivateFields(false), HasProtectedFields(false), + HasPublicFields(false), HasMutableFields(false), HasVariantMembers(false), + HasOnlyCMembers(true), HasInClassInitializer(false), + HasUninitializedReferenceMember(false), + NeedOverloadResolutionForMoveConstructor(false), + NeedOverloadResolutionForMoveAssignment(false), + NeedOverloadResolutionForDestructor(false), + DefaultedMoveConstructorIsDeleted(false), + DefaultedMoveAssignmentIsDeleted(false), + DefaultedDestructorIsDeleted(false), HasTrivialSpecialMembers(SMF_All), + DeclaredNonTrivialSpecialMembers(0), HasIrrelevantDestructor(true), + HasConstexprNonCopyMoveConstructor(false), + DefaultedDefaultConstructorIsConstexpr(true), + HasConstexprDefaultConstructor(false), + HasNonLiteralTypeFieldsOrBases(false), ComputedVisibleConversions(false), + UserProvidedDefaultConstructor(false), DeclaredSpecialMembers(0), + ImplicitCopyConstructorHasConstParam(true), + ImplicitCopyAssignmentHasConstParam(true), + HasDeclaredCopyConstructorWithConstParam(false), + HasDeclaredCopyAssignmentWithConstParam(false), IsLambda(false), + IsParsingBaseSpecifiers(false), HasHLSLLongVector(false), NumBases(0), + NumVBases(0), Bases(), VBases(), Definition(D), FirstFriend() {} CXXBaseSpecifier *CXXRecordDecl::DefinitionData::getBasesSlowCase() const { return Bases.get(Definition->getASTContext().getExternalSource()); @@ -204,6 +201,10 @@ CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases, if (!BaseClassDecl->isStandardLayout()) data().IsStandardLayout = false; + // Propagate presence of long vector to child classes. + if (BaseClassDecl->hasHLSLLongVector()) + data().HasHLSLLongVector = true; + // Record if this base is the first non-literal field or base. if (!hasNonLiteralTypeFieldsOrBases() && !BaseType->isLiteralType(C)) data().HasNonLiteralTypeFieldsOrBases = true; @@ -385,6 +386,9 @@ void CXXRecordDecl::addedClassSubobject(CXXRecordDecl *Subobj) { data().NeedOverloadResolutionForMoveConstructor = true; data().NeedOverloadResolutionForDestructor = true; } + + if (Subobj->hasHLSLLongVector()) + data().HasHLSLLongVector = true; } /// Callback function for CXXRecordDecl::forallBases that acknowledges diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp index d83b307463..41175e3d37 100644 --- a/tools/clang/lib/AST/HlslTypes.cpp +++ b/tools/clang/lib/AST/HlslTypes.cpp @@ -53,44 +53,44 @@ ConvertHLSLVecMatTypeToExtVectorType(const clang::ASTContext &context, return nullptr; } +template static AttrType *getAttr(clang::QualType type) { + type = type.getCanonicalType(); + if (const RecordType *RT = type->getAs()) { + if (const auto *Spec = + dyn_cast(RT->getDecl())) + if (const auto *Template = + dyn_cast(Spec->getSpecializedTemplate())) + return Template->getTemplatedDecl()->getAttr(); + if (const auto *Decl = dyn_cast(RT->getDecl())) + return Decl->getAttr(); + } + return nullptr; +} + bool IsHLSLVecMatType(clang::QualType type) { - const Type *Ty = type.getCanonicalType().getTypePtr(); - if (const RecordType *RT = dyn_cast(Ty)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast(RT->getDecl())) { - if (templateDecl->getName() == "vector") { - return true; - } else if (templateDecl->getName() == "matrix") { - return true; - } - } + type = type.getCanonicalType(); + if (const RecordType *RT = type->getAs()) { + if (const auto *Spec = + dyn_cast(RT->getDecl())) + if (const auto *Template = + dyn_cast(Spec->getSpecializedTemplate())) + return Template->getTemplatedDecl()->getAttr() || + Template->getTemplatedDecl()->getAttr(); + if (const auto *Decl = dyn_cast(RT->getDecl())) + return Decl->getAttr() || Decl->getAttr(); } return false; } bool IsHLSLMatType(clang::QualType type) { - const clang::Type *Ty = type.getCanonicalType().getTypePtr(); - if (const RecordType *RT = dyn_cast(Ty)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast(RT->getDecl())) { - if (templateDecl->getName() == "matrix") { - return true; - } - } - } + if (getAttr(type)) + return true; return false; } bool IsHLSLVecType(clang::QualType type) { - const clang::Type *Ty = type.getCanonicalType().getTypePtr(); - if (const RecordType *RT = dyn_cast(Ty)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast(RT->getDecl())) { - if (templateDecl->getName() == "vector") { - return true; - } - } - } + if (getAttr(type)) + return true; return false; } @@ -474,160 +474,52 @@ clang::QualType GetHLSLMatElementType(clang::QualType type) { QualType elemTy = arg0.getAsType(); return elemTy; } + // TODO: Add type cache to ASTContext. bool IsHLSLInputPatchType(QualType type) { - type = type.getCanonicalType(); - if (const RecordType *RT = dyn_cast(type)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast( - RT->getAsCXXRecordDecl())) { - if (templateDecl->getName() == "InputPatch") { - return true; - } - } - } + if (const HLSLTessPatchAttr *Attr = getAttr(type)) + return Attr->getIsInput(); return false; } + bool IsHLSLOutputPatchType(QualType type) { - type = type.getCanonicalType(); - if (const RecordType *RT = dyn_cast(type)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast( - RT->getAsCXXRecordDecl())) { - if (templateDecl->getName() == "OutputPatch") { - return true; - } - } - } + if (const HLSLTessPatchAttr *Attr = getAttr(type)) + return !Attr->getIsInput(); return false; } + bool IsHLSLPointStreamType(QualType type) { - type = type.getCanonicalType(); - if (const RecordType *RT = dyn_cast(type)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast( - RT->getAsCXXRecordDecl())) { - if (templateDecl->getName() == "PointStream") - return true; - } - } + if (const HLSLStreamOutputAttr *Attr = getAttr(type)) + return Attr->getPrimVertices() == (unsigned)DXIL::InputPrimitive::Point; return false; } + bool IsHLSLLineStreamType(QualType type) { - type = type.getCanonicalType(); - if (const RecordType *RT = dyn_cast(type)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast( - RT->getAsCXXRecordDecl())) { - if (templateDecl->getName() == "LineStream") - return true; - } - } + if (const HLSLStreamOutputAttr *Attr = getAttr(type)) + return Attr->getPrimVertices() == (unsigned)DXIL::InputPrimitive::Line; return false; } + bool IsHLSLTriangleStreamType(QualType type) { - type = type.getCanonicalType(); - if (const RecordType *RT = dyn_cast(type)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast( - RT->getAsCXXRecordDecl())) { - if (templateDecl->getName() == "TriangleStream") - return true; - } - } + if (const HLSLStreamOutputAttr *Attr = getAttr(type)) + return Attr->getPrimVertices() == (unsigned)DXIL::InputPrimitive::Triangle; return false; } + bool IsHLSLStreamOutputType(QualType type) { - type = type.getCanonicalType(); - if (const RecordType *RT = dyn_cast(type)) { - if (const ClassTemplateSpecializationDecl *templateDecl = - dyn_cast( - RT->getAsCXXRecordDecl())) { - if (templateDecl->getName() == "PointStream") - return true; - if (templateDecl->getName() == "LineStream") - return true; - if (templateDecl->getName() == "TriangleStream") - return true; - } - } + if (getAttr(type)) + return true; return false; } -bool IsHLSLResourceType(clang::QualType type) { - if (const RecordType *RT = type->getAs()) { - StringRef name = RT->getDecl()->getName(); - if (name == "Texture1D" || name == "RWTexture1D") - return true; - if (name == "Texture2D" || name == "RWTexture2D") - return true; - if (name == "Texture2DMS" || name == "RWTexture2DMS") - return true; - if (name == "Texture3D" || name == "RWTexture3D") - return true; - if (name == "TextureCube" || name == "RWTextureCube") - return true; - - if (name == "Texture1DArray" || name == "RWTexture1DArray") - return true; - if (name == "Texture2DArray" || name == "RWTexture2DArray") - return true; - if (name == "Texture2DMSArray" || name == "RWTexture2DMSArray") - return true; - if (name == "TextureCubeArray" || name == "RWTextureCubeArray") - return true; - - if (name == "FeedbackTexture2D" || name == "FeedbackTexture2DArray") - return true; - - if (name == "RasterizerOrderedTexture1D" || - name == "RasterizerOrderedTexture2D" || - name == "RasterizerOrderedTexture3D" || - name == "RasterizerOrderedTexture1DArray" || - name == "RasterizerOrderedTexture2DArray" || - name == "RasterizerOrderedBuffer" || - name == "RasterizerOrderedByteAddressBuffer" || - name == "RasterizerOrderedStructuredBuffer") - return true; - - if (name == "ByteAddressBuffer" || name == "RWByteAddressBuffer") - return true; - - if (name == "StructuredBuffer" || name == "RWStructuredBuffer") - return true; - - if (name == "AppendStructuredBuffer" || name == "ConsumeStructuredBuffer") - return true; - - if (name == "Buffer" || name == "RWBuffer") - return true; - - if (name == "SamplerState" || name == "SamplerComparisonState") - return true; - if (name == "ConstantBuffer" || name == "TextureBuffer") - return true; - - if (name == "RaytracingAccelerationStructure") - return true; - } +bool IsHLSLResourceType(clang::QualType type) { + if (getAttr(type)) + return true; return false; } -static HLSLNodeObjectAttr *getNodeAttr(clang::QualType type) { - if (const RecordType *RT = type->getAs()) { - if (const auto *Spec = - dyn_cast(RT->getDecl())) - if (const auto *Template = - dyn_cast(Spec->getSpecializedTemplate())) - return Template->getTemplatedDecl()->getAttr(); - if (const auto *Decl = dyn_cast(RT->getDecl())) - return Decl->getAttr(); - } - return nullptr; -} - DXIL::NodeIOKind GetNodeIOType(clang::QualType type) { - if (const HLSLNodeObjectAttr *Attr = getNodeAttr(type)) + if (const HLSLNodeObjectAttr *Attr = getAttr(type)) return Attr->getNodeIOType(); return DXIL::NodeIOKind::Invalid; } @@ -654,27 +546,20 @@ bool IsHLSLDynamicSamplerType(clang::QualType type) { } bool IsHLSLNodeType(clang::QualType type) { - if (const HLSLNodeObjectAttr *Attr = getNodeAttr(type)) + if (const HLSLNodeObjectAttr *Attr = getAttr(type)) return true; return false; } bool IsHLSLObjectWithImplicitMemberAccess(clang::QualType type) { - if (const RecordType *RT = type->getAs()) { - StringRef name = RT->getDecl()->getName(); - if (name == "ConstantBuffer" || name == "TextureBuffer") - return true; - } + if (const HLSLResourceAttr *Attr = getAttr(type)) + return DXIL::IsCTBuffer(Attr->getResKind()); return false; } bool IsHLSLObjectWithImplicitROMemberAccess(clang::QualType type) { - if (const RecordType *RT = type->getAs()) { - StringRef name = RT->getDecl()->getName(); - // Read-only records - if (name == "ConstantBuffer" || name == "TextureBuffer") - return true; - } + if (const HLSLResourceAttr *Attr = getAttr(type)) + return DXIL::IsCTBuffer(Attr->getResKind()); return false; } @@ -701,14 +586,8 @@ bool IsHLSLNodeOutputType(clang::QualType type) { } bool IsHLSLStructuredBufferType(clang::QualType type) { - if (const RecordType *RT = type->getAs()) { - StringRef name = RT->getDecl()->getName(); - if (name == "StructuredBuffer" || name == "RWStructuredBuffer") - return true; - - if (name == "AppendStructuredBuffer" || name == "ConsumeStructuredBuffer") - return true; - } + if (const HLSLResourceAttr *Attr = getAttr(type)) + return Attr->getResKind() == DXIL::ResourceKind::StructuredBuffer; return false; } @@ -914,7 +793,8 @@ QualType GetHLSLResourceResultType(QualType type) { if (const ClassTemplateSpecializationDecl *templateDecl = dyn_cast(RD)) { - if (RD->getName().startswith("FeedbackTexture")) { + const HLSLResourceAttr *Attr = getAttr(type); + if (Attr && DXIL::IsFeedbackTexture(Attr->getResKind())) { // Feedback textures are write-only and the data is opaque, // so there is no result type per se. return {}; diff --git a/tools/clang/lib/CodeGen/CGExprScalar.cpp b/tools/clang/lib/CodeGen/CGExprScalar.cpp index 0cb993e6f4..530c791fcc 100644 --- a/tools/clang/lib/CodeGen/CGExprScalar.cpp +++ b/tools/clang/lib/CodeGen/CGExprScalar.cpp @@ -3713,20 +3713,7 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) { llvm::Value *CondV = CGF.EmitScalarExpr(condExpr); llvm::Value *LHS = Visit(lhsExpr); llvm::Value *RHS = Visit(rhsExpr); - if (llvm::VectorType *VT = dyn_cast(CondV->getType())) { - llvm::VectorType *ResultVT = cast(LHS->getType()); - llvm::Value *result = llvm::UndefValue::get(ResultVT); - for (unsigned i = 0; i < VT->getNumElements(); i++) { - llvm::Value *EltCond = Builder.CreateExtractElement(CondV, i); - llvm::Value *EltL = Builder.CreateExtractElement(LHS, i); - llvm::Value *EltR = Builder.CreateExtractElement(RHS, i); - llvm::Value *EltSelect = Builder.CreateSelect(EltCond, EltL, EltR); - result = Builder.CreateInsertElement(result, EltSelect, i); - } - return result; - } else { - return Builder.CreateSelect(CondV, LHS, RHS); - } + return Builder.CreateSelect(CondV, LHS, RHS); } if (hlsl::IsHLSLMatType(E->getType())) { llvm::Value *Cond = CGF.EmitScalarExpr(condExpr); diff --git a/tools/clang/lib/Sema/SemaDXR.cpp b/tools/clang/lib/Sema/SemaDXR.cpp index 6d838fb203..73ea9dd93c 100644 --- a/tools/clang/lib/Sema/SemaDXR.cpp +++ b/tools/clang/lib/Sema/SemaDXR.cpp @@ -810,6 +810,12 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, return; } + if (ContainsLongVector(Payload->getType())) { + S.Diag(Payload->getLocation(), diag::err_hlsl_unsupported_long_vector) + << "payload parameters"; + return; + } + CollectNonAccessableFields(PayloadType, CallerStage, {}, {}, NonWriteableFields, NonReadableFields); diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index fb3937cfd5..555b0ba44a 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -363,6 +363,8 @@ enum ArBasicKind { #define IS_BPROP_STREAM(_Props) (((_Props)&BPROP_STREAM) != 0) +#define IS_BPROP_PATCH(_Props) (((_Props) & BPROP_PATCH) != 0) + #define IS_BPROP_SAMPLER(_Props) (((_Props)&BPROP_SAMPLER) != 0) #define IS_BPROP_TEXTURE(_Props) (((_Props)&BPROP_TEXTURE) != 0) @@ -616,6 +618,8 @@ C_ASSERT(ARRAYSIZE(g_uBasicKindProps) == AR_BASIC_MAXIMUM_COUNT); #define IS_BASIC_STREAM(_Kind) IS_BPROP_STREAM(GetBasicKindProps(_Kind)) +#define IS_BASIC_PATCH(_Kind) IS_BPROP_PATCH(GetBasicKindProps(_Kind)) + #define IS_BASIC_SAMPLER(_Kind) IS_BPROP_SAMPLER(GetBasicKindProps(_Kind)) #define IS_BASIC_TEXTURE(_Kind) IS_BPROP_TEXTURE(GetBasicKindProps(_Kind)) #define IS_BASIC_OBJECT(_Kind) IS_BPROP_OBJECT(GetBasicKindProps(_Kind)) @@ -1014,12 +1018,13 @@ static const ArBasicKind g_UIntCT[] = {AR_BASIC_UINT32, AR_BASIC_LITERAL_INT, // should map to int32, not int16 or int64 static const ArBasicKind g_AnyIntCT[] = { AR_BASIC_INT32, AR_BASIC_INT16, AR_BASIC_UINT32, AR_BASIC_UINT16, - AR_BASIC_INT64, AR_BASIC_UINT64, AR_BASIC_LITERAL_INT, AR_BASIC_UNKNOWN}; + AR_BASIC_INT64, AR_BASIC_UINT64, AR_BASIC_INT8_4PACKED, AR_BASIC_UINT8_4PACKED, + AR_BASIC_LITERAL_INT, AR_BASIC_UNKNOWN}; static const ArBasicKind g_AnyInt32CT[] = { - AR_BASIC_INT32, AR_BASIC_UINT32, AR_BASIC_LITERAL_INT, AR_BASIC_UNKNOWN}; + AR_BASIC_INT32, AR_BASIC_UINT32, AR_BASIC_INT8_4PACKED, AR_BASIC_UINT8_4PACKED, AR_BASIC_LITERAL_INT, AR_BASIC_UNKNOWN}; -static const ArBasicKind g_UIntOnlyCT[] = {AR_BASIC_UINT32, AR_BASIC_UINT64, +static const ArBasicKind g_UIntOnlyCT[] = {AR_BASIC_UINT32, AR_BASIC_UINT64,AR_BASIC_INT8_4PACKED, AR_BASIC_UINT8_4PACKED, AR_BASIC_LITERAL_INT, AR_BASIC_NOCAST, AR_BASIC_UNKNOWN}; @@ -1059,18 +1064,20 @@ static const ArBasicKind g_NumericCT[] = { AR_BASIC_UINT16, AR_BASIC_UINT32, AR_BASIC_MIN12INT, AR_BASIC_MIN16INT, AR_BASIC_MIN16UINT, AR_BASIC_INT64, - AR_BASIC_UINT64, AR_BASIC_UNKNOWN}; + AR_BASIC_UINT64, AR_BASIC_INT8_4PACKED, AR_BASIC_UINT8_4PACKED, AR_BASIC_UNKNOWN}; static const ArBasicKind g_Numeric32CT[] = { AR_BASIC_FLOAT32, AR_BASIC_FLOAT32_PARTIAL_PRECISION, AR_BASIC_LITERAL_FLOAT, AR_BASIC_LITERAL_INT, AR_BASIC_INT32, AR_BASIC_UINT32, +AR_BASIC_INT8_4PACKED, AR_BASIC_UINT8_4PACKED, AR_BASIC_UNKNOWN}; static const ArBasicKind g_Numeric32OnlyCT[] = { AR_BASIC_FLOAT32, AR_BASIC_FLOAT32_PARTIAL_PRECISION, AR_BASIC_LITERAL_FLOAT, AR_BASIC_LITERAL_INT, AR_BASIC_INT32, AR_BASIC_UINT32, +AR_BASIC_INT8_4PACKED, AR_BASIC_UINT8_4PACKED, AR_BASIC_NOCAST, AR_BASIC_UNKNOWN}; static const ArBasicKind g_AnyCT[] = { @@ -1083,6 +1090,7 @@ static const ArBasicKind g_AnyCT[] = { AR_BASIC_MIN12INT, AR_BASIC_MIN16INT, AR_BASIC_MIN16UINT, AR_BASIC_BOOL, AR_BASIC_INT64, AR_BASIC_UINT64, +AR_BASIC_INT8_4PACKED, AR_BASIC_UINT8_4PACKED, AR_BASIC_UNKNOWN}; static const ArBasicKind g_AnySamplerCT[] = { @@ -1146,6 +1154,7 @@ static const ArBasicKind g_Numeric16OnlyCT[] = { AR_BASIC_UNKNOWN}; static const ArBasicKind g_Int32OnlyCT[] = {AR_BASIC_INT32, AR_BASIC_UINT32, +AR_BASIC_INT8_4PACKED, AR_BASIC_UINT8_4PACKED, AR_BASIC_LITERAL_INT, AR_BASIC_NOCAST, AR_BASIC_UNKNOWN}; @@ -1170,10 +1179,13 @@ static const ArBasicKind g_UInt8_4PackedCT[] = { static const ArBasicKind g_AnyInt16Or32CT[] = { AR_BASIC_INT32, AR_BASIC_UINT32, AR_BASIC_INT16, - AR_BASIC_UINT16, AR_BASIC_LITERAL_INT, AR_BASIC_UNKNOWN}; + AR_BASIC_UINT16, +AR_BASIC_INT8_4PACKED, AR_BASIC_UINT8_4PACKED, AR_BASIC_LITERAL_INT, AR_BASIC_UNKNOWN}; static const ArBasicKind g_SInt16Or32OnlyCT[] = { - AR_BASIC_INT32, AR_BASIC_INT16, AR_BASIC_LITERAL_INT, AR_BASIC_NOCAST, + AR_BASIC_INT32, AR_BASIC_INT16, AR_BASIC_LITERAL_INT, +AR_BASIC_INT8_4PACKED, AR_BASIC_UINT8_4PACKED, +AR_BASIC_NOCAST, AR_BASIC_UNKNOWN}; static const ArBasicKind g_ByteAddressBufferCT[] = { @@ -2856,8 +2868,9 @@ class HLSLExternalSource : public ExternalSemaSource { TypedefDecl *m_matrixShorthandTypes[HLSLScalarTypeCount][4][4]; // Vector types already built. - QualType m_vectorTypes[HLSLScalarTypeCount][4]; - TypedefDecl *m_vectorTypedefs[HLSLScalarTypeCount][4]; + QualType m_vectorTypes[HLSLScalarTypeCount][DXIL::kDefaultMaxVectorLength]; + TypedefDecl + *m_vectorTypedefs[HLSLScalarTypeCount][DXIL::kDefaultMaxVectorLength]; // BuiltinType for each scalar type. QualType m_baseTypes[HLSLScalarTypeCount]; @@ -3540,6 +3553,20 @@ class HLSLExternalSource : public ExternalSemaSource { if (kind == AR_OBJECT_LEGACY_EFFECT) effectKindIndex = i; + InheritableAttr *Attr = nullptr; + if (IS_BASIC_STREAM(kind)) + Attr = HLSLStreamOutputAttr::CreateImplicit( + *m_context, kind - AR_OBJECT_POINTSTREAM + 1); + else if (IS_BASIC_PATCH(kind)) + Attr = HLSLTessPatchAttr::CreateImplicit(*m_context, + kind == AR_OBJECT_INPUTPATCH); + else { + DXIL::ResourceKind ResKind = DXIL::ResourceKind::NumEntries; + DXIL::ResourceClass ResClass = DXIL::ResourceClass::Invalid; + if (GetBasicKindResourceKindAndClass(kind, ResKind, ResClass)) + Attr = HLSLResourceAttr::CreateImplicit(*m_context, (unsigned)ResKind, + (unsigned)ResClass); + } DXASSERT(kind < _countof(g_ArBasicTypeNames), "g_ArBasicTypeNames has the wrong number of entries"); assert(kind < _countof(g_ArBasicTypeNames)); @@ -3586,9 +3613,9 @@ class HLSLExternalSource : public ExternalSemaSource { break; } } else if (kind == AR_OBJECT_CONSTANT_BUFFER) { - recordDecl = DeclareConstantBufferViewType(*m_context, /*bTBuf*/ false); + recordDecl = DeclareConstantBufferViewType(*m_context, Attr); } else if (kind == AR_OBJECT_TEXTURE_BUFFER) { - recordDecl = DeclareConstantBufferViewType(*m_context, /*bTBuf*/ true); + recordDecl = DeclareConstantBufferViewType(*m_context, Attr); } else if (kind == AR_OBJECT_RAY_QUERY) { recordDecl = DeclareRayQueryType(*m_context); } else if (kind == AR_OBJECT_HEAP_RESOURCE) { @@ -3609,10 +3636,10 @@ class HLSLExternalSource : public ExternalSemaSource { } } else if (kind == AR_OBJECT_FEEDBACKTEXTURE2D) { recordDecl = DeclareUIntTemplatedTypeWithHandle( - *m_context, "FeedbackTexture2D", "kind"); + *m_context, "FeedbackTexture2D", "kind", Attr); } else if (kind == AR_OBJECT_FEEDBACKTEXTURE2D_ARRAY) { recordDecl = DeclareUIntTemplatedTypeWithHandle( - *m_context, "FeedbackTexture2DArray", "kind"); + *m_context, "FeedbackTexture2DArray", "kind", Attr); } else if (kind == AR_OBJECT_EMPTY_NODE_INPUT) { recordDecl = DeclareNodeOrRecordType( *m_context, DXIL::NodeIOKind::EmptyInput, @@ -3728,20 +3755,12 @@ class HLSLExternalSource : public ExternalSemaSource { } #endif else if (templateArgCount == 0) { - recordDecl = DeclareRecordTypeWithHandle(*m_context, typeName, - /*isCompleteType*/ false); + recordDecl = + DeclareRecordTypeWithHandle(*m_context, typeName, + /*isCompleteType*/ false, Attr); } else { DXASSERT(templateArgCount == 1 || templateArgCount == 2, "otherwise a new case has been added"); - - InheritableAttr *Attr = nullptr; - DXIL::ResourceKind ResKind = DXIL::ResourceKind::NumEntries; - DXIL::ResourceClass ResClass = DXIL::ResourceClass::Invalid; - if (GetBasicKindResourceKindAndClass(kind, ResKind, ResClass)) { - Attr = HLSLResourceAttr::CreateImplicit(*m_context, (unsigned)ResKind, - (unsigned)ResClass); - } - TypeSourceInfo *typeDefault = TemplateHasDefaultType(kind) ? float4TypeSourceInfo : nullptr; recordDecl = DeclareTemplateTypeWithHandle( @@ -3830,7 +3849,7 @@ class HLSLExternalSource : public ExternalSemaSource { clang::TypedefDecl *LookupVectorShorthandType(HLSLScalarType scalarType, UINT colCount) { DXASSERT_NOMSG(scalarType != HLSLScalarType::HLSLScalarType_unknown && - colCount <= 4); + colCount <= DXIL::kDefaultMaxVectorLength); TypedefDecl *qts = m_vectorTypedefs[scalarType][colCount - 1]; if (qts == nullptr) { QualType type = LookupVectorType(scalarType, colCount); @@ -3937,7 +3956,9 @@ class HLSLExternalSource : public ExternalSemaSource { } QualType LookupVectorType(HLSLScalarType scalarType, unsigned int colCount) { - QualType qt = m_vectorTypes[scalarType][colCount - 1]; + QualType qt; + if (colCount < DXIL::kDefaultMaxVectorLength) + qt = m_vectorTypes[scalarType][colCount - 1]; if (qt.isNull()) { if (m_scalarTypes[scalarType].isNull()) { LookupScalarTypeDef(scalarType); @@ -3945,7 +3966,8 @@ class HLSLExternalSource : public ExternalSemaSource { qt = GetOrCreateVectorSpecialization(*m_context, m_sema, m_vectorTemplateDecl, m_scalarTypes[scalarType], colCount); - m_vectorTypes[scalarType][colCount - 1] = qt; + if (colCount < DXIL::kDefaultMaxVectorLength) + m_vectorTypes[scalarType][colCount - 1] = qt; } return qt; } @@ -4741,7 +4763,7 @@ class HLSLExternalSource : public ExternalSemaSource { return true; case AR_OBJECT_TEXTURE_BUFFER: ResKind = DXIL::ResourceKind::TBuffer; - ResClass = DXIL::ResourceClass::CBuffer; + ResClass = DXIL::ResourceClass::SRV; return true; case AR_OBJECT_FEEDBACKTEXTURE2D: ResKind = DXIL::ResourceKind::FeedbackTexture2D; @@ -4751,6 +4773,15 @@ class HLSLExternalSource : public ExternalSemaSource { ResKind = DXIL::ResourceKind::FeedbackTexture2DArray; ResClass = DXIL::ResourceClass::SRV; return true; + case AR_OBJECT_SAMPLER: + case AR_OBJECT_SAMPLERCOMPARISON: + ResKind = DXIL::ResourceKind::Sampler; + ResClass = DXIL::ResourceClass::Sampler; + return true; + case AR_OBJECT_ACCELERATION_STRUCT: + ResKind = DXIL::ResourceKind::RTAccelerationStructure; + ResClass = DXIL::ResourceClass::SRV; + return true; default: return false; } @@ -5033,10 +5064,6 @@ class HLSLExternalSource : public ExternalSemaSource { AR_BASIC_UNKNOWN; } - /// Checks whether the specified value is a valid vector - /// size. - bool IsValidVectorSize(size_t length) { return 1 <= length && length <= 4; } - /// Checks whether the specified value is a valid matrix row or /// column size. bool IsValidMatrixColOrRowSize(size_t length) { @@ -5072,11 +5099,6 @@ class HLSLExternalSource : public ExternalSemaSource { false); } else if (objectKind == AR_TOBJ_VECTOR) { bool valid = true; - if (!IsValidVectorSize(GetHLSLVecSize(type))) { - valid = false; - m_sema->Diag(argLoc, diag::err_hlsl_unsupportedvectorsize) - << type << GetHLSLVecSize(type); - } if (!IsScalarType(GetMatrixOrVectorElementType(type))) { valid = false; m_sema->Diag(argLoc, diag::err_hlsl_unsupportedvectortype) @@ -5194,9 +5216,13 @@ class HLSLExternalSource : public ExternalSemaSource { SourceLocation Loc); bool CheckRangedTemplateArgument(SourceLocation diagLoc, - llvm::APSInt &sintValue) { - if (!sintValue.isStrictlyPositive() || sintValue.getLimitedValue() > 4) { - m_sema->Diag(diagLoc, diag::err_hlsl_invalid_range_1_4); + llvm::APSInt &sintValue, bool IsVector) { + unsigned MaxLength = DXIL::kDefaultMaxVectorLength; + if (IsVector) + MaxLength = m_sema->getLangOpts().MaxHLSLVectorLength; + if (!sintValue.isStrictlyPositive() || + sintValue.getLimitedValue() > MaxLength) { + m_sema->Diag(diagLoc, diag::err_hlsl_invalid_range_1_to_max) << MaxLength; return true; } @@ -5219,11 +5245,14 @@ class HLSLExternalSource : public ExternalSemaSource { return false; } // Allow object type for Constant/TextureBuffer. - if (templateName == "ConstantBuffer" || templateName == "TextureBuffer") { + HLSLResourceAttr *ResAttr = + Template->getTemplatedDecl()->getAttr(); + if (ResAttr && DXIL::IsCTBuffer(ResAttr->getResKind())) { if (TemplateArgList.size() == 1) { const TemplateArgumentLoc &argLoc = TemplateArgList[0]; const TemplateArgument &arg = argLoc.getArgument(); - DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, ""); + DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, + "cbuffer with non-type template arg"); QualType argType = arg.getAsType(); SourceLocation argSrcLoc = argLoc.getLocation(); if (IsScalarType(argType) || IsVectorType(m_sema, argType) || @@ -5233,23 +5262,13 @@ class HLSLExternalSource : public ExternalSemaSource { << argType; return true; } - if (auto *TST = dyn_cast(argType)) { - // This is a bit of a special case we need to handle. Because the - // buffer types don't use their template parameter in a way that would - // force instantiation, we need to force specialization here. - GetOrCreateTemplateSpecialization( - *m_context, *m_sema, - cast( - TST->getTemplateName().getAsTemplateDecl()), - llvm::ArrayRef(TST->getArgs(), - TST->getNumArgs())); - } - if (const RecordType *recordType = argType->getAs()) { - if (!recordType->getDecl()->isCompleteDefinition()) { - m_sema->Diag(argSrcLoc, diag::err_typecheck_decl_incomplete_type) - << argType; - return true; - } + m_sema->RequireCompleteType(argSrcLoc, argType, + diag::err_typecheck_decl_incomplete_type); + + if (ContainsLongVector(argType)) { + m_sema->Diag(argSrcLoc, diag::err_hlsl_unsupported_long_vector) + << "ConstantBuffers or TextureBuffers"; + return true; } } return false; @@ -5279,22 +5298,13 @@ class HLSLExternalSource : public ExternalSemaSource { // template instantiation. if (ArgTy->isDependentType()) return false; - if (auto *recordType = ArgTy->getAs()) { - if (CXXRecordDecl *cxxRecordDecl = - dyn_cast(recordType->getDecl())) { - if (ClassTemplateSpecializationDecl *templateSpecializationDecl = - dyn_cast(cxxRecordDecl)) { - if (templateSpecializationDecl->getSpecializationKind() == - TSK_Undeclared) { - // Make sure specialization is done before IsTypeNumeric. - // If not, ArgTy might be treat as empty struct. - m_sema->RequireCompleteType( - ArgLoc.getLocation(), ArgTy, - diag::err_typecheck_decl_incomplete_type); - } - } - } - } + // Make sure specialization is done before IsTypeNumeric. + // If not, ArgTy might be treat as empty struct. + m_sema->RequireCompleteType(ArgLoc.getLocation(), ArgTy, + diag::err_typecheck_decl_incomplete_type); + CXXRecordDecl *Decl = ArgTy->getAsCXXRecordDecl(); + if (Decl && !Decl->isCompleteDefinition()) + return true; // The node record type must be compound - error if it is not. if (GetTypeObjectKind(ArgTy) != AR_TOBJ_COMPOUND) { m_sema->Diag(ArgLoc.getLocation(), diag::err_hlsl_node_record_type) @@ -5316,6 +5326,43 @@ class HLSLExternalSource : public ExternalSemaSource { return true; } return false; + } else if (Template->getTemplatedDecl()->hasAttr()) { + DXASSERT(TemplateArgList.size() > 0, + "Tessellation patch should have at least one template args"); + const TemplateArgumentLoc &argLoc = TemplateArgList[0]; + const TemplateArgument &arg = argLoc.getArgument(); + DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, + "Tessellation patch requires type template arg 0"); + + m_sema->RequireCompleteType(argLoc.getLocation(), arg.getAsType(), + diag::err_typecheck_decl_incomplete_type); + CXXRecordDecl *Decl = arg.getAsType()->getAsCXXRecordDecl(); + if (Decl && !Decl->isCompleteDefinition()) + return true; + if (ContainsLongVector(arg.getAsType())) { + m_sema->Diag(argLoc.getLocation(), + diag::err_hlsl_unsupported_long_vector) + << "tessellation patches"; + return true; + } + } else if (Template->getTemplatedDecl()->hasAttr()) { + DXASSERT(TemplateArgList.size() > 0, + "Geometry streams should have at least one template args"); + const TemplateArgumentLoc &argLoc = TemplateArgList[0]; + const TemplateArgument &arg = argLoc.getArgument(); + DXASSERT(arg.getKind() == TemplateArgument::ArgKind::Type, + "Geometry stream requires type template arg 0"); + m_sema->RequireCompleteType(argLoc.getLocation(), arg.getAsType(), + diag::err_typecheck_decl_incomplete_type); + CXXRecordDecl *Decl = arg.getAsType()->getAsCXXRecordDecl(); + if (Decl && !Decl->isCompleteDefinition()) + return true; + if (ContainsLongVector(arg.getAsType())) { + m_sema->Diag(argLoc.getLocation(), + diag::err_hlsl_unsupported_long_vector) + << "geometry streams"; + return true; + } } bool isMatrix = Template->getCanonicalDecl() == @@ -5337,9 +5384,7 @@ class HLSLExternalSource : public ExternalSemaSource { // NOTE: IsValidTemplateArgumentType emits its own diagnostics return true; } - HLSLResourceAttr *ResAttr = - Template->getTemplatedDecl()->getAttr(); - if (ResAttr && IsTyped((DXIL::ResourceKind)ResAttr->getResKind())) { + if (ResAttr && IsTyped(ResAttr->getResKind())) { // Check vectors for being too large. if (IsVectorType(m_sema, argType)) { unsigned NumElt = hlsl::GetElementCount(argType); @@ -5368,17 +5413,16 @@ class HLSLExternalSource : public ExternalSemaSource { llvm::APSInt constantResult; if (expr != nullptr && expr->isIntegerConstantExpr(constantResult, *m_context)) { - if (CheckRangedTemplateArgument(argSrcLoc, constantResult)) { + if (CheckRangedTemplateArgument(argSrcLoc, constantResult, + isVector)) return true; - } } } } else if (arg.getKind() == TemplateArgument::ArgKind::Integral) { if (isMatrix || isVector) { llvm::APSInt Val = arg.getAsIntegral(); - if (CheckRangedTemplateArgument(argSrcLoc, Val)) { + if (CheckRangedTemplateArgument(argSrcLoc, Val, isVector)) return true; - } } } } @@ -6631,6 +6675,9 @@ bool HLSLExternalSource::MatchArguments( } } + std::string profile = m_sema->getLangOpts().HLSLProfile; + const ShaderModel *SM = hlsl::ShaderModel::GetByName(profile.c_str()); + // Populate argTypes. for (size_t i = 0; i <= Args.size(); i++) { const HLSL_INTRINSIC_ARGUMENT *pArgument = &pIntrinsic->pArgs[i]; @@ -6801,8 +6848,9 @@ bool HLSLExternalSource::MatchArguments( } // Verify that the final results are in bounds. - CAB(uCols > 0 && uCols <= MaxVectorSize && uRows > 0 && - uRows <= MaxVectorSize, + CAB((uCols > 0 && uRows > 0 && + ((uCols <= MaxVectorSize && uRows <= MaxVectorSize) || + (SM->IsSM69Plus() && uRows == 1))), i); // Const @@ -8571,6 +8619,10 @@ ExprResult HLSLExternalSource::LookupVectorMemberExprForHLSL( llvm_unreachable("Unknown VectorMemberAccessError value"); } + + if (colCount > 4) + msg = diag::err_hlsl_vector_member_on_long_vector; + if (msg != 0) { m_sema->Diag(MemberLoc, msg) << memberText; @@ -11581,10 +11633,17 @@ bool hlsl::DiagnoseNodeStructArgument(Sema *self, TemplateArgumentLoc ArgLoc, HLSLExternalSource *source = HLSLExternalSource::FromSema(self); ArTypeObjectKind shapeKind = source->GetTypeObjectKind(ArgTy); switch (shapeKind) { + case AR_TOBJ_VECTOR: + if (GetHLSLVecSize(ArgTy) > DXIL::kDefaultMaxVectorLength) { + self->Diag(ArgLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) + << "node records"; + Empty = false; + return false; + } + LLVM_FALLTHROUGH; case AR_TOBJ_ARRAY: case AR_TOBJ_BASIC: case AR_TOBJ_MATRIX: - case AR_TOBJ_VECTOR: Empty = false; return false; case AR_TOBJ_OBJECT: @@ -11603,14 +11662,15 @@ bool hlsl::DiagnoseNodeStructArgument(Sema *self, TemplateArgumentLoc ArgLoc, bool ErrorFound = false; const RecordDecl *RD = ArgTy->getAs()->getDecl(); // Check the fields of the RecordDecl - RecordDecl::field_iterator begin = RD->field_begin(); - RecordDecl::field_iterator end = RD->field_end(); - while (begin != end) { - const FieldDecl *FD = *begin; + for (auto *FD : RD->fields()) ErrorFound |= DiagnoseNodeStructArgument(self, ArgLoc, FD->getType(), Empty, FD); - begin++; - } + if (RD->isCompleteDefinition()) + if (auto *Child = dyn_cast(RD)) + // Walk up the inheritance chain and check base class fields + for (auto &B : Child->bases()) + ErrorFound |= + DiagnoseNodeStructArgument(self, ArgLoc, B.getType(), Empty); return ErrorFound; } default: @@ -12046,6 +12106,21 @@ bool hlsl::ShouldSkipNRVO(clang::Sema &sema, clang::QualType returnType, return false; } +bool hlsl::ContainsLongVector(QualType qt) { + if (qt.isNull() || qt->isDependentType()) + return false; + + while (const ArrayType *Arr = qt->getAsArrayTypeUnsafe()) + qt = Arr->getElementType(); + + if (CXXRecordDecl *Decl = qt->getAsCXXRecordDecl()) { + if (!Decl->isCompleteDefinition()) + return false; + return Decl->hasHLSLLongVector(); + } + return false; +} + bool hlsl::IsConversionToLessOrEqualElements( clang::Sema *self, const clang::ExprResult &sourceExpr, const clang::QualType &targetType, bool explicitConversion) { @@ -14369,6 +14444,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, *pDispatchGrid = nullptr, *pMaxDispatchGrid = nullptr; bool usageIn = false; bool usageOut = false; + bool isGroupShared = false; for (clang::AttributeList *pAttr = D.getDeclSpec().getAttributes().getList(); pAttr != NULL; pAttr = pAttr->getNext()) { @@ -14392,6 +14468,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, } break; case AttributeList::AT_HLSLGroupShared: + isGroupShared = true; if (!isGlobal) { Diag(pAttr->getLoc(), diag::err_hlsl_varmodifierna) << pAttr->getName() << declarationType << pAttr->getRange(); @@ -14672,6 +14749,22 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, result = false; } + // Disallow long vecs from $Global cbuffers. + if (isGlobal && !isStatic && !isGroupShared) { + // Suppress actual emitting of errors for incompletable types here + // They are redundant to those produced in ActOnUninitializedDecl. + struct SilentDiagnoser : public TypeDiagnoser { + SilentDiagnoser() : TypeDiagnoser(true) {} + virtual void diagnose(Sema &S, SourceLocation Loc, QualType T) {} + } SD; + RequireCompleteType(D.getLocStart(), qt, SD); + if (ContainsLongVector(qt)) { + Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector) + << "cbuffers or tbuffers"; + result = false; + } + } + // SPIRV change starts #ifdef ENABLE_SPIRV_CODEGEN // Validate that Vulkan specific feature is only used when targeting SPIR-V @@ -15560,6 +15653,16 @@ static bool isRelatedDeclMarkedNointerpolation(Expr *E) { return false; } +// Verify that user-defined intrinsic struct args contain no long vectors +static bool CheckUDTIntrinsicArg(Sema *S, Expr *Arg) { + if (ContainsLongVector(Arg->getType())) { + S->Diag(Arg->getExprLoc(), diag::err_hlsl_unsupported_long_vector) + << "user-defined struct parameter"; + return true; + } + return false; +} + static bool CheckIntrinsicGetAttributeAtVertex(Sema *S, FunctionDecl *FDecl, CallExpr *TheCall) { assert(TheCall->getNumArgs() > 0); @@ -15577,6 +15680,12 @@ static bool CheckIntrinsicGetAttributeAtVertex(Sema *S, FunctionDecl *FDecl, bool Sema::CheckHLSLIntrinsicCall(FunctionDecl *FDecl, CallExpr *TheCall) { auto attr = FDecl->getAttr(); + if (!attr) + return false; + + if (!IsBuiltinTable(attr->getGroup())) + return false; + switch (hlsl::IntrinsicOp(attr->getOpcode())) { case hlsl::IntrinsicOp::IOP_GetAttributeAtVertex: // See #hlsl-specs/issues/181. Feature is broken. For SPIR-V we want @@ -15588,6 +15697,22 @@ bool Sema::CheckHLSLIntrinsicCall(FunctionDecl *FDecl, CallExpr *TheCall) { // existing ones. See the ExtensionTest.EvalAttributeCollision test. assert(FDecl->getName() == "GetAttributeAtVertex"); return CheckIntrinsicGetAttributeAtVertex(this, FDecl, TheCall); + case hlsl::IntrinsicOp::IOP_DispatchMesh: + assert(TheCall->getNumArgs() > 3); + assert(FDecl->getName() == "DispatchMesh"); + return CheckUDTIntrinsicArg(this, TheCall->getArg(3)->IgnoreCasts()); + case hlsl::IntrinsicOp::IOP_CallShader: + assert(TheCall->getNumArgs() > 1); + assert(FDecl->getName() == "CallShader"); + return CheckUDTIntrinsicArg(this, TheCall->getArg(1)->IgnoreCasts()); + case hlsl::IntrinsicOp::IOP_TraceRay: + assert(TheCall->getNumArgs() > 7); + assert(FDecl->getName() == "TraceRay"); + return CheckUDTIntrinsicArg(this, TheCall->getArg(7)->IgnoreCasts()); + case hlsl::IntrinsicOp::IOP_ReportHit: + assert(TheCall->getNumArgs() > 2); + assert(FDecl->getName() == "ReportHit"); + return CheckUDTIntrinsicArg(this, TheCall->getArg(2)->IgnoreCasts()); default: break; } @@ -16268,6 +16393,19 @@ void DiagnoseEntry(Sema &S, FunctionDecl *FD) { return; } + // Check general parameter characteristics + // Would be nice to check for resources here as they crash the compiler now. + // See issue #7186. + for (const auto *param : FD->params()) { + if (ContainsLongVector(param->getType())) + S.Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector) + << "entry function parameters"; + } + + if (ContainsLongVector(FD->getReturnType())) + S.Diag(FD->getLocation(), diag::err_hlsl_unsupported_long_vector) + << "entry function return type"; + DXIL::ShaderKind Stage = ShaderModel::KindFromFullName(shaderAttr->getStage()); llvm::StringRef StageName = shaderAttr->getStage(); diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp index cf5d741541..a11f72b306 100644 --- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp +++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp @@ -520,6 +520,16 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { << hullPatchCount.value(); } } + for (const auto *param : pPatchFnDecl->params()) + if (ContainsLongVector(param->getType())) + self->Diag(param->getLocation(), + diag::err_hlsl_unsupported_long_vector) + << "patch constant function parameters"; + + if (ContainsLongVector(pPatchFnDecl->getReturnType())) + self->Diag(pPatchFnDecl->getLocation(), + diag::err_hlsl_unsupported_long_vector) + << "patch constant function return type"; } DXIL::ShaderKind EntrySK = shaderModel->GetKind(); diff --git a/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp b/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp index a6ae05faa5..1eacedbb0b 100644 --- a/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -2139,6 +2139,18 @@ Sema::InstantiateClass(SourceLocation PointOfInstantiation, SourceLocation(), SourceLocation(), nullptr); CheckCompletedCXXClass(Instantiation); + // HLSL Change Begin - set longvec bit for vectors of over 4 elements + ClassTemplateSpecializationDecl *Spec = + dyn_cast(Instantiation); + if (Spec && Spec->hasAttr()) { + const TemplateArgumentList &argList = Spec->getTemplateArgs(); + const TemplateArgument &arg1 = argList[1]; + llvm::APSInt vecSize = arg1.getAsIntegral(); + if (vecSize.getLimitedValue() > hlsl::DXIL::kDefaultMaxVectorLength) + Instantiation->setHasHLSLLongVector(); + } + // HLSL Change End - set longvec bit for vectors of over 4 elements + // Default arguments are parsed, if not instantiated. We can go instantiate // default arg exprs for default constructors if necessary now. ActOnFinishCXXMemberDefaultArgs(Instantiation); diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-scalars.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-scalars.hlsl new file mode 100644 index 0000000000..03735cb968 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-scalars.hlsl @@ -0,0 +1,162 @@ +// RUN: %dxc -DTYPE=float -T vs_6_6 %s | FileCheck %s +// RUN: %dxc -DTYPE=bool -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,I1 +// RUN: %dxc -DTYPE=uint64_t -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,I64 +// RUN: %dxc -DTYPE=double -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,F64 + +// RUN: %dxc -DTYPE=float1 -T vs_6_6 %s | FileCheck %s +// RUN: %dxc -DTYPE=bool1 -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,I1 +// RUN: %dxc -DTYPE=uint64_t1 -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,I64 +// RUN: %dxc -DTYPE=double1 -T vs_6_6 %s | FileCheck %s --check-prefixes=CHECK,F64 + +// Confirm that 6.9 doesn't use vector loads for scalars and vec1s +// RUN: %dxc -DTYPE=float -T vs_6_9 %s | FileCheck %s +// RUN: %dxc -DTYPE=bool -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I1 +// RUN: %dxc -DTYPE=uint64_t -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I64 +// RUN: %dxc -DTYPE=double -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,F64 + +// RUN: %dxc -DTYPE=float1 -T vs_6_9 %s | FileCheck %s +// RUiN: %dxc -DTYPE=bool1 -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I1 +// RUN: %dxc -DTYPE=uint64_t1 -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I64 +// RUN: %dxc -DTYPE=double1 -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,F64 + +/////////////////////////////////////////////////////////////////////// +// Test codegen for various load and store operations and conversions +// for different scalar buffer types and confirm that the proper +// loads, stores, and conversion operations take place. +/////////////////////////////////////////////////////////////////////// + + +// These -DAGs must match the same line. That is the only reason for the -DAG. +// The first match will assign [[TY]] to the native type +// For most runs, the second match will assign [[TY32]] to the same thing. +// For 64-bit types, the memory representation is i32 and a separate variable is needed. +// For these cases, there is another line that will always match i32. +// This line will also force the previous -DAGs to match the same line since the most +// This shader can produce is two ResRet types. +// CHECK-DAG: %dx.types.ResRet.[[TY:[a-z][0-9][0-9]]] = type { [[TYPE:[a-z0-9]*]], +// CHECK-DAG: %dx.types.ResRet.[[TY32:[a-z][0-9][0-9]]] = type { [[TYPE]], +// I64: %dx.types.ResRet.[[TY32:i32]] +// F64: %dx.types.ResRet.[[TY32:i32]] + + ByteAddressBuffer RoByBuf : register(t1); +RWByteAddressBuffer RwByBuf : register(u1); + + StructuredBuffer< TYPE > RoStBuf : register(t2); +RWStructuredBuffer< TYPE > RwStBuf : register(u2); + + Buffer< TYPE > RoTyBuf : register(t3); +RWBuffer< TYPE > RwTyBuf : register(u3); + +ConsumeStructuredBuffer CnStBuf : register(u4); +AppendStructuredBuffer ApStBuf : register(u5); + +void main(uint ix[2] : IX) { + // ByteAddressBuffer Tests + + // CHECK-DAG: [[HDLROBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false) + // CHECK-DAG: [[HDLRWBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false) + + // CHECK-DAG: [[HDLROST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 0 }, i32 2, i1 false) + // CHECK-DAG: [[HDLRWST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 1 }, i32 2, i1 false) + + // CHECK-DAG: [[HDLROTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 0 }, i32 3, i1 false) + // CHECK-DAG: [[HDLRWTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 1 }, i32 3, i1 false) + + // CHECK-DAG: [[HDLCON:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 4, i32 4, i32 0, i8 1 }, i32 4, i1 false) + // CHECK-DAG: [[HDLAPP:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 0, i8 1 }, i32 5, i1 false) + + // CHECK: [[IX0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] + // I1: icmp ne i32 %{{.*}}, 0 + TYPE babElt1 = RwByBuf.Load< TYPE >(ix[0]); + + // CHECK: [[ANHDLROBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROBY]] + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0]] + // I1: icmp ne i32 %{{.*}}, 0 + TYPE babElt2 = RoByBuf.Load< TYPE >(ix[0]); + + // I1: zext i1 %{{.*}} to i32 + // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] + RwByBuf.Store< TYPE >(ix[0], babElt1 + babElt2); + + // StructuredBuffer Tests + // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt1 = RwStBuf.Load(ix[0]); + // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]] + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt2 = RwStBuf[ix[1]]; + + // CHECK: [[ANHDLROST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROST]] + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]] + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt3 = RoStBuf.Load(ix[0]); + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]] + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt4 = RoStBuf[ix[1]]; + + // I1: zext i1 %{{.*}} to i32 + // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] + RwStBuf[ix[0]] = stbElt1 + stbElt2 + stbElt3 + stbElt4; + + // {Append/Consume}StructuredBuffer Tests + // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]] + // CHECK: [[CONIX:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[ANHDLCON]], i8 -1) + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]] + // I1: icmp ne i32 %{{.*}}, 0 + TYPE cnElt = CnStBuf.Consume(); + + // CHECK: [[ANHDLAPP:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLAPP]] + // CHECK: [[APPIX:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[ANHDLAPP]], i8 1) + // I1: zext i1 %{{.*}} to i32 + // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]] + ApStBuf.Append(cnElt); + + // TypedBuffer Tests + // CHECK: [[ANHDLRWTY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTY]] + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX0]] + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE typElt1 = RwTyBuf.Load(ix[0]); + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX1]] + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE typElt2 = RwTyBuf[ix[1]]; + // CHECK: [[ANHDLROTY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTY]] + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX0]] + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE typElt3 = RoTyBuf.Load(ix[0]); + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX1]] + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE typElt4 = RoTyBuf[ix[1]]; + + // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102 + // I64: trunc i64 %{{.*}} to i32 + // I64: lshr i64 %{{.*}}, 32 + // I64: trunc i64 %{{.*}} to i32 + // I1: zext i1 %{{.*}} to i32 + // CHECK: all void @dx.op.bufferStore.[[TY32]](i32 69, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX0]] + RwTyBuf[ix[0]] = typElt1 + typElt2 + typElt3 + typElt4; +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl new file mode 100644 index 0000000000..8df8a36cd7 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl @@ -0,0 +1,139 @@ +// RUN: %dxc -DTYPE=float -DNUM=4 -T vs_6_9 %s | FileCheck %s +// RUN: %dxc -DTYPE=bool -DNUM=4 -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I1 +// RUN: %dxc -DTYPE=uint8_t4_packed -DNUM=4 -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK + +// 64-bit types require operation/intrinsic support to convert the values to/from the i32 memory representations. +// RUN: %dxc -DTYPE=uint64_t -DNUM=2 -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,I64 +// RUN: %dxc -DTYPE=double -DNUM=2 -T vs_6_9 %s | FileCheck %s --check-prefixes=CHECK,F64 + +/////////////////////////////////////////////////////////////////////// +// Test codegen for various load and store operations and conversions +// for different scalar/vector buffer types and indices. +/////////////////////////////////////////////////////////////////////// + +// CHECK-DAG: %dx.types.ResRet.[[VTY:v[0-9]*[a-z][0-9][0-9]]] = type { [[VTYPE:<[a-z 0-9]*>]], +// CHECK-DAG: %dx.types.ResRet.[[TY:[if][0-9][0-9]]] = type +// CHECK: %"class.StructuredBuffer + + ByteAddressBuffer RoByBuf : register(t1); +RWByteAddressBuffer RwByBuf : register(u1); + +StructuredBuffer< vector > RoStBuf : register(t2); +RWStructuredBuffer< vector > RwStBuf : register(u2); + + Buffer< vector > RoTyBuf : register(t3); +RWBuffer< vector > RwTyBuf : register(u3); + +ConsumeStructuredBuffer > CnStBuf : register(u4); +AppendStructuredBuffer > ApStBuf : register(u5); + +void main(uint ix[2] : IX) { + // ByteAddressBuffer Tests + + // CHECK-DAG: [[HDLROBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false) + // CHECK-DAG: [[HDLRWBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 1 }, i32 1, i1 false) + + // CHECK-DAG: [[HDLROST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 0 }, i32 2, i1 false) + // CHECK-DAG: [[HDLRWST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 1 }, i32 2, i1 false) + + // CHECK-DAG: [[HDLROTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 0 }, i32 3, i1 false) + // CHECK-DAG: [[HDLRWTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 1 }, i32 3, i1 false) + + // CHECK-DAG: [[HDLCON:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 4, i32 4, i32 0, i8 1 }, i32 4, i1 false) + // CHECK-DAG: [[HDLAPP:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 0, i8 1 }, i32 5, i1 false) + + // CHECK: [[IX0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] + // I1: icmp ne [[VTYPE]] %{{.*}}, zeroinitializer + vector babElt1 = RwByBuf.Load< vector >(ix[0]); + + // CHECK: [[ANHDLROBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROBY]] + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0]] + // I1: icmp ne [[VTYPE]] %{{.*}}, zeroinitializer + vector babElt2 = RoByBuf.Load< vector >(ix[0]); + + // I1: zext <[[NUM]] x i1> %{{.*}} to [[VTYPE]] + // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] + RwByBuf.Store< vector >(ix[0], babElt1 + babElt2); + + // StructuredBuffer Tests + // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] + // I1: icmp ne [[VTYPE]] %{{.*}}, zeroinitializer + vector stbElt1 = RwStBuf.Load(ix[0]); + // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]] + // I1: icmp ne [[VTYPE]] %{{.*}}, zeroinitializer + vector stbElt2 = RwStBuf[ix[1]]; + + // CHECK: [[ANHDLROST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROST]] + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]] + // I1: icmp ne [[VTYPE]] %{{.*}}, zeroinitializer + vector stbElt3 = RoStBuf.Load(ix[0]); + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]] + // I1: icmp ne [[VTYPE]] %{{.*}}, zeroinitializer + vector stbElt4 = RoStBuf[ix[1]]; + + // I1: zext <[[NUM]] x i1> %{{.*}} to [[VTYPE]] + // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] + RwStBuf[ix[0]] = stbElt1 + stbElt2 + stbElt3 + stbElt4; + + // {Append/Consume}StructuredBuffer Tests + // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]] + // CHECK: [[CONIX:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[ANHDLCON]], i8 -1) + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]] + // I1: icmp ne [[VTYPE]] %{{.*}}, zeroinitializer + vector cnElt = CnStBuf.Consume(); + + // CHECK: [[ANHDLAPP:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLAPP]] + // CHECK: [[APPIX:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[ANHDLAPP]], i8 1) + // I1: zext <[[NUM]] x i1> %{{.*}} to [[VTYPE]] + // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]] + ApStBuf.Append(cnElt); + + // TypedBuffer Tests + // CHECK: [[ANHDLRWTY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTY]] + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.bufferLoad.[[TY]](i32 68, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX0]] + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne [[VTYPE]] %{{.*}}, zeroinitializer + vector typElt1 = RwTyBuf.Load(ix[0]); + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.bufferLoad.[[TY]](i32 68, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX1]] + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne [[VTYPE]] %{{.*}}, zeroinitializer + vector typElt2 = RwTyBuf[ix[1]]; + // CHECK: [[ANHDLROTY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTY]] + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.bufferLoad.[[TY]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX0]] + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne [[VTYPE]] %{{.*}}, zeroinitializer + vector typElt3 = RoTyBuf.Load(ix[0]); + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.bufferLoad.[[TY]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX1]] + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne [[VTYPE]] %{{.*}}, zeroinitializer + vector typElt4 = RoTyBuf[ix[1]]; + + // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102 + // I64: trunc i64 %{{.*}} to i32 + // lshr i64 %{{.*}}, 32 + // I64: trunc i64 %{{.*}} to i32 + // I1: zext <[[NUM]] x i1> %{{.*}} to [[VTYPE]] + // CHECK: all void @dx.op.bufferStore.[[TY]](i32 69, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX0]] + RwTyBuf[ix[0]] = typElt1 + typElt2 + typElt3 + typElt4; +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl index ea44fef604..8dcf5ead1c 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl @@ -27,13 +27,20 @@ RWByteAddressBuffer RwByBuf : register(u1); StructuredBuffer< TYPE > RoStBuf : register(t2); RWStructuredBuffer< TYPE > RwStBuf : register(u2); - Buffer< TYPE > RoTyBuf : register(t3); -RWBuffer< TYPE > RwTyBuf : register(u3); +ConsumeStructuredBuffer CnStBuf : register(u3); +AppendStructuredBuffer ApStBuf : register(u4); -ConsumeStructuredBuffer CnStBuf : register(u4); -AppendStructuredBuffer ApStBuf : register(u5); + Buffer< TYPE > RoTyBuf : register(t5); +RWBuffer< TYPE > RwTyBuf : register(u5); -void main(uint ix[2] : IX) { + Texture1D< TYPE > RoTex1d : register(t6); +RWTexture1D< TYPE > RwTex1d : register(u6); + Texture2D< TYPE > RoTex2d : register(t7); +RWTexture2D< TYPE > RwTex2d : register(u7); + Texture3D< TYPE > RoTex3d : register(t8); +RWTexture3D< TYPE > RwTex3d : register(u8); + +void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // ByteAddressBuffer Tests // CHECK-DAG: [[HDLROBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false) @@ -42,13 +49,27 @@ void main(uint ix[2] : IX) { // CHECK-DAG: [[HDLROST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 0 }, i32 2, i1 false) // CHECK-DAG: [[HDLRWST:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 0, i8 1 }, i32 2, i1 false) - // CHECK-DAG: [[HDLROTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 0 }, i32 3, i1 false) - // CHECK-DAG: [[HDLRWTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 1 }, i32 3, i1 false) + // CHECK-DAG: [[HDLCON:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 3, i32 0, i8 1 }, i32 3, i1 false) + // CHECK-DAG: [[HDLAPP:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 4, i32 4, i32 0, i8 1 }, i32 4, i1 false) + + // CHECK-DAG: [[HDLROTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 0, i8 0 }, i32 5, i1 false) + // CHECK-DAG: [[HDLRWTY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 0, i8 1 }, i32 5, i1 false) - // CHECK-DAG: [[HDLCON:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 4, i32 4, i32 0, i8 1 }, i32 4, i1 false) - // CHECK-DAG: [[HDLAPP:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 0, i8 1 }, i32 5, i1 false) + // CHECK-DAG: [[HDLROTX1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 6, i32 6, i32 0, i8 0 }, i32 6, i1 false) + // CHECK-DAG: [[HDLRWTX1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 6, i32 6, i32 0, i8 1 }, i32 6, i1 false) + // CHECK-DAG: [[HDLROTX2:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 7, i32 7, i32 0, i8 0 }, i32 7, i1 false) + // CHECK-DAG: [[HDLRWTX2:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 7, i32 7, i32 0, i8 1 }, i32 7, i1 false) + // CHECK-DAG: [[HDLROTX3:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 8, i32 8, i32 0, i8 0 }, i32 8, i1 false) + // CHECK-DAG: [[HDLRWTX3:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 8, i32 8, i32 0, i8 1 }, i32 8, i1 false) - // CHECK: [[IX0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + + // CHECK-DAG: [[IX0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0 + // CHECK-DAG: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 0 + // CHECK-DAG: [[IX20:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 0 + // CHECK-DAG: [[IX21:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 1 + // CHECK-DAG: [[IX30:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 3, i32 0, i8 0 + // CHECK-DAG: [[IX31:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 3, i32 0, i8 1 + // CHECK-DAG: [[IX32:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 3, i32 0, i8 2 // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] @@ -56,7 +77,7 @@ void main(uint ix[2] : IX) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE babElt1 = RwByBuf.Load< TYPE >(ix[0]); + TYPE babElt1 = RwByBuf.Load< TYPE >(ix0); // CHECK: [[ANHDLROBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROBY]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0]] @@ -64,14 +85,14 @@ void main(uint ix[2] : IX) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE babElt2 = RoByBuf.Load< TYPE >(ix[0]); + TYPE babElt2 = RoByBuf.Load< TYPE >(ix0); // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] - RwByBuf.Store< TYPE >(ix[0], babElt1 + babElt2); + RwByBuf.Store< TYPE >(ix0, babElt1 + babElt2); // StructuredBuffer Tests // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] @@ -80,14 +101,13 @@ void main(uint ix[2] : IX) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE stbElt1 = RwStBuf.Load(ix[0]); - // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + TYPE stbElt1 = RwStBuf.Load(ix0); // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE stbElt2 = RwStBuf[ix[1]]; + TYPE stbElt2 = RwStBuf[ix1]; // CHECK: [[ANHDLROST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROST]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]] @@ -95,20 +115,20 @@ void main(uint ix[2] : IX) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE stbElt3 = RoStBuf.Load(ix[0]); + TYPE stbElt3 = RoStBuf.Load(ix0); // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE stbElt4 = RoStBuf[ix[1]]; + TYPE stbElt4 = RoStBuf[ix1]; // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] - RwStBuf[ix[0]] = stbElt1 + stbElt2 + stbElt3 + stbElt4; + RwStBuf[ix0] = stbElt1 + stbElt2 + stbElt3 + stbElt4; // {Append/Consume}StructuredBuffer Tests // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]] @@ -146,7 +166,7 @@ void main(uint ix[2] : IX) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE typElt1 = RwTyBuf.Load(ix[0]); + TYPE typElt1 = RwTyBuf.Load(ix0); // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX1]] // F64: call double @dx.op.makeDouble.f64(i32 101 // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -162,7 +182,7 @@ void main(uint ix[2] : IX) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE typElt2 = RwTyBuf[ix[1]]; + TYPE typElt2 = RwTyBuf[ix1]; // CHECK: [[ANHDLROTY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTY]] // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX0]] // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -179,7 +199,7 @@ void main(uint ix[2] : IX) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE typElt3 = RoTyBuf.Load(ix[0]); + TYPE typElt3 = RoTyBuf.Load(ix0); // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX1]] // F64: call double @dx.op.makeDouble.f64(i32 101 // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -195,7 +215,7 @@ void main(uint ix[2] : IX) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 - TYPE typElt4 = RoTyBuf[ix[1]]; + TYPE typElt4 = RoTyBuf[ix1]; // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102 // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102 @@ -210,5 +230,126 @@ void main(uint ix[2] : IX) { // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // CHECK: all void @dx.op.bufferStore.[[TY32]](i32 69, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX0]] - RwTyBuf[ix[0]] = typElt1 + typElt2 + typElt3 + typElt4; + RwTyBuf[ix0] = typElt1 + typElt2 + typElt3 + typElt4; + + // Texture Tests + // CHECK: [[ANHDLROTX1:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTX1]] + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLROTX1]], i32 0, i32 [[IX0]], i32 undef, i32 undef + // F64: call double @dx.op.makeDouble.f64(i32 101 + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE texElt1 = RoTex1d[ix0]; + // CHECK: [[ANHDLRWTX1:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX1]] + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX1]], i32 undef, i32 [[IX0]], i32 undef, i32 undef + // F64: call double @dx.op.makeDouble.f64(i32 101 + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE texElt2 = RwTex1d[ix0]; + + // CHECK: [[ANHDLROTX2:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTX2]] + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLROTX2]], i32 0, i32 [[IX20]], i32 [[IX21]], i32 undef + // F64: call double @dx.op.makeDouble.f64(i32 101 + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE texElt3 = RoTex2d[ix2]; + // CHECK: [[ANHDLRWTX2:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX2]] + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX2]], i32 undef, i32 [[IX20]], i32 [[IX21]], i32 undef + // F64: call double @dx.op.makeDouble.f64(i32 101 + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE texElt4 = RwTex2d[ix2]; + + // CHECK: [[ANHDLROTX3:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTX3]] + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLROTX3]], i32 0, i32 [[IX30]], i32 [[IX31]], i32 [[IX32]] + // F64: call double @dx.op.makeDouble.f64(i32 101 + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE texElt5 = RoTex3d[ix3]; + // CHECK: [[ANHDLRWTX3:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX3]] + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX3]], i32 undef, i32 [[IX30]], i32 [[IX31]], i32 [[IX32]] + // F64: call double @dx.op.makeDouble.f64(i32 101 + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE texElt6 = RwTex3d[ix3]; + + // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102 + // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102 + // I64: trunc i64 %{{.*}} to i32 + // lshr i64 %{{.*}}, 32 + // I64: trunc i64 %{{.*}} to i32 + // I64: trunc i64 %{{.*}} to i32 + // lshr i64 %{{.*}}, 32 + // I64: trunc i64 %{{.*}} to i32 + // I1: zext i1 %{{.*}} to i32 + // I1: zext i1 %{{.*}} to i32 + // I1: zext i1 %{{.*}} to i32 + // I1: zext i1 %{{.*}} to i32 + // CHECK: call void @dx.op.textureStore.[[TY32]](i32 67, %dx.types.Handle [[ANHDLRWTX3]], i32 [[IX30]], i32 [[IX31]], i32 [[IX32]] + RwTex3d[ix3] = texElt1 + texElt2 + texElt3 + texElt4 + texElt5 + texElt6; } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-decls.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-decls.hlsl new file mode 100644 index 0000000000..8bc7b9e73d --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-decls.hlsl @@ -0,0 +1,322 @@ +// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=float -DNUM=5 %s | FileCheck %s -check-prefixes=CHECK,F5 +// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=bool -DNUM=7 %s | FileCheck %s -check-prefixes=CHECK,B7 +// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=uint64_t -DNUM=9 %s | FileCheck %s -check-prefixes=CHECK,L9 +// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=double -DNUM=17 %s | FileCheck %s -check-prefixes=CHECK,D17 +// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=float16_t -DNUM=256 -enable-16bit-types %s | FileCheck %s -check-prefixes=CHECK,H256 +// RUN: %dxc -fcgl -T lib_6_9 -DTYPE=int16_t -DNUM=1024 -enable-16bit-types %s | FileCheck %s -check-prefixes=CHECK,S1024 + +// A test to verify that declarations of longvecs are permitted in all the accepted places. +// Only tests for acceptance, most codegen is ignored for now. + +// CHECK: %struct.LongVec = type { <4 x float>, <[[NUM:[0-9]*]] x [[STY:[a-z0-9]*]]> } +struct LongVec { + float4 f; + vector vec; +}; + +struct LongVecSub : LongVec { + int3 is; +}; + +template +struct LongVecTpl { + float4 f; + vector vec; +}; + +// Just some dummies to capture the types and mangles. +// CHECK: @"\01?dummy@@3[[MNG:F|M|N|_N|_K|\$f16@]]A" = external addrspace(3) global [[STY]] +groupshared TYPE dummy; + +// Use the first groupshared to establish mangles and sizes +// F5-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:M]]$[[VS:04]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]> +// B7-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:_N]]$[[VS:06]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]> +// L9-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:_K]]$[[VS:08]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]> +// D17-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:N]]$[[VS:0BB@]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]> +// H256-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:\$f16@]]$[[VS:0BAA@]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]> +// S1024-DAG: @"\01?gs_vec@@3V?$vector@[[MNG:F]]$[[VS:0EAA@]]@@A" = external addrspace(3) global <[[NUM]] x [[STY]]> +groupshared vector gs_vec; + +// CHECK-DAG: @"\01?gs_vec_arr@@3PAV?$vector@[[MNG]]$[[VS]]@@A" = external addrspace(3) global [10 x <[[NUM]] x [[STY]]>] +groupshared vector gs_vec_arr[10]; +// CHECK-DAG: @"\01?gs_vec_rec@@3ULongVec@@A" = external addrspace(3) global %struct.LongVec +groupshared LongVec gs_vec_rec; +// CHECK-DAG: @"\01?gs_vec_sub@@3ULongVecSub@@A" = external addrspace(3) global %struct.LongVecSub +groupshared LongVecSub gs_vec_sub; +// CHECK-DAG: @"\01?gs_vec_tpl@@3U?$LongVecTpl@$[[VS]]@@A" = external addrspace(3) global %"struct.LongVecTpl<[[NUM]]>" +groupshared LongVecTpl gs_vec_tpl; + +// CHECK-DAG: @static_vec = internal global <[[NUM]] x [[STY]]> +static vector static_vec; +// CHECK-DAG: @static_vec_arr = internal global [10 x <[[NUM]] x [[STY]]>] zeroinitializer +static vector static_vec_arr[10]; +// CHECK-DAG: @static_vec_rec = internal global %struct.LongVec +static LongVec static_vec_rec; +// CHECK-DAG: @static_vec_sub = internal global %struct.LongVecSub +static LongVecSub static_vec_sub; +// CHECK-DAG: @static_vec_tpl = internal global %"struct.LongVecTpl<[[NUM]]>" +static LongVecTpl static_vec_tpl; + +// CHECK: define [[RTY:[a-z0-9]*]] @"\01?getVal@@YA[[MNG]][[MNG]]@Z"([[RTY]] {{.*}}%t) +export TYPE getVal(TYPE t) {TYPE ret = dummy; dummy = t; return ret;} + +// CHECK: define <[[NUM]] x [[RTY]]> +// CHECK-LABEL: @"\01?lv_param_passthru +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$[[VS]]@@V1@@Z"(<[[NUM]] x [[RTY]]> %vec1) +// CHECK: ret <[[NUM]] x [[RTY]]> +export vector lv_param_passthru(vector vec1) { + return vec1; +} + +// CHECK-LABEL: define void @"\01?lv_param_arr_passthru +// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$[[VS]]@@Y09V1@@Z"([10 x <[[NUM]] x [[STY]]>]* noalias sret %agg.result, [10 x <[[NUM]] x [[STY]]>]* %vec) +// CHECK: ret void +export vector lv_param_arr_passthru(vector vec[10])[10] { + return vec; +} + +// CHECK-LABEL: define void @"\01?lv_param_rec_passthru@@YA?AULongVec@@U1@@Z"(%struct.LongVec* noalias sret %agg.result, %struct.LongVec* %vec) +// CHECK: memcpy +// CHECK: ret void +export LongVec lv_param_rec_passthru(LongVec vec) { + return vec; +} + +// CHECK-LABEL: define void @"\01?lv_param_sub_passthru@@YA?AULongVec@@U1@@Z"(%struct.LongVec* noalias sret %agg.result, %struct.LongVec* %vec) +// CHECK: memcpy +// CHECK: ret void +export LongVec lv_param_sub_passthru(LongVec vec) { + return vec; +} + +// CHECK-LABEL: define void @"\01?lv_param_tpl_passthru@@YA?AULongVec@@U1@@Z"(%struct.LongVec* noalias sret %agg.result, %struct.LongVec* %vec) +// CHECK: memcpy +// CHECK: ret void +export LongVec lv_param_tpl_passthru(LongVec vec) { + return vec; +} + +// CHECK-LABEL: define void @"\01?lv_param_in_out +// CHECK-SAME: @@YAXV?$vector@[[MNG]]$[[VS]]@@AIAV1@@Z"(<[[NUM]] x [[RTY]]> %vec1, <[[NUM]] x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec2) +// CHECK: store <[[NUM]] x [[STY]]> {{%.*}}, <[[NUM]] x [[STY]]>* %vec2, align 4 +// CHECK: ret void +export void lv_param_in_out(in vector vec1, out vector vec2) { + vec2 = vec1; +} + +// CHECK-LABEL: define void @"\01?lv_param_in_out_rec@@YAXULongVec@@U1@@Z"(%struct.LongVec* %vec1, %struct.LongVec* noalias %vec2) +// CHECK: memcpy +// CHECK: ret void +export void lv_param_in_out_rec(in LongVec vec1, out LongVec vec2) { + vec2 = vec1; +} + +// CHECK-LABEL: define void @"\01?lv_param_in_out_sub@@YAXULongVec@@U1@@Z"(%struct.LongVec* %vec1, %struct.LongVec* noalias %vec2) +// CHECK: memcpy +// CHECK: ret void +export void lv_param_in_out_sub(in LongVec vec1, out LongVec vec2) { + vec2 = vec1; +} + +// CHECK-LABEL: define void @"\01?lv_param_in_out_tpl@@YAXULongVec@@U1@@Z"(%struct.LongVec* %vec1, %struct.LongVec* noalias %vec2) +// CHECK: memcpy +// CHECK: ret void +export void lv_param_in_out_tpl(in LongVec vec1, out LongVec vec2) { + vec2 = vec1; +} + + +// CHECK-LABEL: define void @"\01?lv_param_inout +// CHECK-SAME: @@YAXAIAV?$vector@[[MNG]]$[[VS]]@@0@Z"(<[[NUM]] x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec1, <[[NUM]] x [[STY]]>* noalias dereferenceable({{[0-9]*}}) %vec2) +// CHECK: load <[[NUM]] x [[STY]]>, <[[NUM]] x [[STY]]>* %vec1, align 4 +// CHECK: load <[[NUM]] x [[STY]]>, <[[NUM]] x [[STY]]>* %vec2, align 4 +// CHECK: store <[[NUM]] x [[STY]]> {{%.*}}, <[[NUM]] x [[STY]]>* %vec1, align 4 +// CHECK: store <[[NUM]] x [[STY]]> {{%.*}}, <[[NUM]] x [[STY]]>* %vec2, align 4 +// CHECK: ret void +export void lv_param_inout(inout vector vec1, inout vector vec2) { + vector tmp = vec1; + vec1 = vec2; + vec2 = tmp; +} + +// CHECK-LABEL: define void @"\01?lv_param_inout_rec@@YAXULongVec@@0@Z"(%struct.LongVec* noalias %vec1, %struct.LongVec* noalias %vec2) +// CHECK: memcpy +// CHECK: ret void +export void lv_param_inout_rec(inout LongVec vec1, inout LongVec vec2) { + LongVec tmp = vec1; + vec1 = vec2; + vec2 = tmp; +} + +// CHECK-LABEL: define void @"\01?lv_param_inout_sub@@YAXULongVec@@0@Z"(%struct.LongVec* noalias %vec1, %struct.LongVec* noalias %vec2) +// CHECK: memcpy +// CHECK: ret void +export void lv_param_inout_sub(inout LongVec vec1, inout LongVec vec2) { + LongVec tmp = vec1; + vec1 = vec2; + vec2 = tmp; +} + +// CHECK-LABEL: define void @"\01?lv_param_inout_tpl@@YAXULongVec@@0@Z"(%struct.LongVec* noalias %vec1, %struct.LongVec* noalias %vec2) +// CHECK: memcpy +// CHECK: ret void +export void lv_param_inout_tpl(inout LongVec vec1, inout LongVec vec2) { + LongVec tmp = vec1; + vec1 = vec2; + vec2 = tmp; +} + +// CHECK-LABEL: define void @"\01?lv_global_assign +// CHECK-SAME: @@YAXV?$vector@[[MNG]]$[[VS]]@@Y09V1@ULongVec@@ULongVecSub@@U?$LongVecTpl@$[[VS]]@@@Z"(<[[NUM]] x [[RTY]]> %vec, [10 x <[[NUM]] x [[STY]]>]* %arr, %struct.LongVec* %rec, %struct.LongVecSub* %sub, %"struct.LongVecTpl<[[NUM]]>"* %tpl) +// CHECK: store <[[NUM]] x [[STY]]> {{%.*}}, <[[NUM]] x [[STY]]>* @static_vec +// CHECK: ret void +export void lv_global_assign(vector vec, vector arr[10], + LongVec rec, LongVecSub sub, LongVecTpl tpl) { + static_vec = vec; + static_vec_arr = arr; + static_vec_rec = rec; + static_vec_sub = sub; + static_vec_tpl = tpl; +} + +// CHECK-LABEL: define void @"\01?lv_gs_assign +// CHECK-SAME: @@YAXV?$vector@[[MNG]]$[[VS]]@@Y09V1@ULongVec@@ULongVecSub@@U?$LongVecTpl@$[[VS]]@@@Z"(<[[NUM]] x [[RTY]]> %vec, [10 x <[[NUM]] x [[STY]]>]* %arr, %struct.LongVec* %rec, %struct.LongVecSub* %sub, %"struct.LongVecTpl<[[NUM]]>"* %tpl) +// CHECK: store <[[NUM]] x [[STY]]> {{%.*}}, <[[NUM]] x [[STY]]> addrspace(3)* @"\01?gs_vec@@3V?$vector@[[MNG]]$[[VS]]@@A" +// CHECK: ret void +export void lv_gs_assign(vector vec, vector arr[10], + LongVec rec, LongVecSub sub, LongVecTpl tpl) { + gs_vec = vec; + gs_vec_arr = arr; + gs_vec_rec = sub; + gs_vec_tpl = tpl; +} + +// CHECK: define <[[NUM]] x [[RTY]]> +// CHECK-LABEL: @"\01?lv_global_ret +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$[[VS]]@@XZ"() +// CHECK: load <[[NUM]] x [[STY]]>, <[[NUM]] x [[STY]]>* @static_vec +// CHECK: ret <[[NUM]] x [[RTY]]> +export vector lv_global_ret() { + return static_vec; +} + +// CHECK-LABEL: define void @"\01?lv_global_arr_ret +// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$[[VS]]@@XZ"([10 x <[[NUM]] x [[STY]]>]* noalias sret %agg.result) +// CHECK: ret void +export vector lv_global_arr_ret()[10] { + return static_vec_arr; +} + +// CHECK-LABEL: define void @"\01?lv_global_rec_ret@@YA?AULongVec@@XZ"(%struct.LongVec* noalias sret %agg.result) +// CHECK: memcpy +// CHECK: ret void +export LongVec lv_global_rec_ret() { + return static_vec_rec; +} + +// CHECK-LABEL: define void @"\01?lv_global_sub_ret@@YA?AULongVecSub@@XZ"(%struct.LongVecSub* noalias sret %agg.result) +// CHECK: memcpy +// CHECK: ret void +export LongVecSub lv_global_sub_ret() { + return static_vec_sub; +} + +// CHECK-LABEL: define void @"\01?lv_global_tpl_ret +// CHECK-SAME: @@YA?AU?$LongVecTpl@$[[VS]]@@XZ"(%"struct.LongVecTpl<[[NUM]]>"* noalias sret %agg.result) +// CHECK: memcpy +// CHECK: ret void +export LongVecTpl lv_global_tpl_ret() { + return static_vec_tpl; +} + +// CHECK: define <[[NUM]] x [[RTY]]> +// CHECK-LABEL: @"\01?lv_gs_ret +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$[[VS]]@@XZ"() +// CHECK: load <[[NUM]] x [[STY]]>, <[[NUM]] x [[STY]]> addrspace(3)* @"\01?gs_vec@@3V?$vector@[[MNG]]$[[VS]]@@A" +// CHECK: ret <[[NUM]] x [[RTY]]> +export vector lv_gs_ret() { + return gs_vec; +} + +// CHECK-LABEL: define void @"\01?lv_gs_arr_ret +// CHECK-SAME: @@YA$$BY09V?$vector@[[MNG]]$[[VS]]@@XZ"([10 x <[[NUM]] x [[STY]]>]* noalias sret %agg.result) +// CHECK: ret void +export vector lv_gs_arr_ret()[10] { + return gs_vec_arr; +} + +// CHECK-LABEL: define void @"\01?lv_gs_rec_ret@@YA?AULongVec@@XZ"(%struct.LongVec* noalias sret %agg.result) +// CHECK: memcpy +// CHECK: ret void +export LongVec lv_gs_rec_ret() { + return gs_vec_rec; +} + +// CHECK-LABEL: define void @"\01?lv_gs_sub_ret@@YA?AULongVecSub@@XZ"(%struct.LongVecSub* noalias sret %agg.result) +// CHECK: memcpy +// CHECK: ret void +export LongVecSub lv_gs_sub_ret() { + return gs_vec_sub; +} + +// CHECK-LABEL: define void @"\01?lv_gs_tpl_ret +// CHECK-SAME: @@YA?AU?$LongVecTpl@$[[VS]]@@XZ"(%"struct.LongVecTpl<[[NUM]]>"* noalias sret %agg.result) +// CHECK: memcpy +// CHECK: ret void +export LongVecTpl lv_gs_tpl_ret() { + return gs_vec_tpl; +} + +// CHECK: define <[[NUM]] x [[RTY]]> +// CHECK-LABEL: @"\01?lv_splat +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$[[VS]]@@[[MNG]]@Z"([[RTY]] {{.*}}%scalar) +// CHECK: ret <[[NUM]] x [[RTY]]> +export vector lv_splat(TYPE scalar) { + vector ret = scalar; + return ret; +} + +// CHECK: define <6 x [[RTY]]> +// CHECK-LABEL: @"\01?lv_initlist +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$05@@XZ"() +// CHECK: ret <6 x [[RTY]]> +export vector lv_initlist() { + vector ret = {1, 2, 3, 4, 5, 6}; + return ret; +} + +// CHECK: define <6 x [[RTY]]> +// CHECK-LABEL: @"\01?lv_initlist_vec +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$05@@V?$vector@[[MNG]]$02@@@Z"(<3 x [[RTY]]> %vec) +// CHECK: ret <6 x [[RTY]]> +export vector lv_initlist_vec(vector vec) { + vector ret = {vec, 4.0, 5.0, 6.0}; + return ret; +} + +// CHECK: define <6 x [[RTY]]> +// CHECK-LABEL: @"\01?lv_vec_vec +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$05@@V?$vector@[[MNG]]$02@@0@Z"(<3 x [[RTY]]> %vec1, <3 x [[RTY]]> %vec2) +// CHECK: ret <6 x [[RTY]]> +export vector lv_vec_vec(vector vec1, vector vec2) { + vector ret = {vec1, vec2}; + return ret; +} + +// CHECK: define <[[NUM]] x [[RTY]]> +// CHECK-LABEL: @"\01?lv_array_cast +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$[[VS]]@@Y[[VS]][[MNG]]@Z"({{\[}}[[NUM]] x [[STY]]]* %arr) +// CHECK: ret <[[NUM]] x [[RTY]]> +export vector lv_array_cast(TYPE arr[NUM]) { + vector ret = (vector)arr; + return ret; +} + +// CHECK: define <6 x [[RTY]]> +// CHECK-LABEL: @"\01?lv_ctor +// CHECK-SAME: @@YA?AV?$vector@[[MNG]]$05@@[[MNG]]@Z"([[RTY]] {{.*}}%s) +// CHECK: ret <6 x [[RTY]]> +export vector lv_ctor(TYPE s) { + vector ret = vector(1.0, 2.0, 3.0, 4.0, 5.0, s); + return ret; +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-bool.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-bool.hlsl new file mode 100644 index 0000000000..bb2cae6756 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-bool.hlsl @@ -0,0 +1,463 @@ +// RUN: %dxc -HV 2018 -T lib_6_9 -DNUM=2 %s | FileCheck %s +// RUN: %dxc -HV 2018 -T lib_6_9 -DNUM=5 %s | FileCheck %s +// RUN: %dxc -HV 2018 -T lib_6_9 -DNUM=3 %s | FileCheck %s +// RUN: %dxc -HV 2018 -T lib_6_9 -DNUM=9 %s | FileCheck %s + +// Test relevant operators on an assortment bool vector sizes with 6.9 native vectors. + +// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly. +// Uses non vector buffer to avoid interacting with that implementation. +// CHECK: %dx.types.ResRet.[[TY:[a-z0-9]*]] = type { [[TYPE:[a-z_0-9]*]] +RWStructuredBuffer< bool > buf; + +groupshared vector gs_vec1, gs_vec2; +groupshared vector gs_vec3; + + +// A mixed-type overload to test overload resolution and mingle different vector element types in ops +// Test assignment operators. +// CHECK-LABEL: define void @"\01?assignments +export void assignments(inout vector things[10], bool scales[10]) { + + // Another trick to capture the size. + // CHECK: [[res:%[0-9]*]] = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{[^,]*}}, i32 [[NUM:[0-9]*]] + // CHECK: [[scl:%[0-9]*]] = extractvalue %dx.types.ResRet.i32 [[res]], 0 + // CHECK: [[bscl:%[0-9]*]] = icmp ne i32 [[scl]], 0 + bool scalar = buf.Load(NUM); + + // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 9 + // CHECK: [[vec9:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add9]] + // CHECK: [[bvec9:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec9]], zeroinitializer + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 0 + // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec9]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]] + things[0] = things[9]; + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i1> undef, i1 [[bscl]], i32 0 + // CHECK: [[res:%[0-9]*]] = shufflevector <[[NUM]] x i1> [[spt]], <[[NUM]] x i1> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 5 + // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[res]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]] + things[5] = scalar; + +} + +// Test arithmetic operators. +// CHECK-LABEL: define void @"\01?arithmetic +export vector arithmetic(inout vector things[10])[10] { + vector res[10]; + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 0 + // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]] + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]] + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]] + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add6]] + + // CHECK: [[bvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer + // CHECK: [[svec0:%[0-9]*]] = sext <[[NUM]] x i1> [[bvec0]] to <[[NUM]] x i32> + // CHECK: [[bsvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[svec0]], zeroinitializer + // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[bsvec0]] to <[[NUM]] x i32> + res[0] = -things[0]; + + // CHECK: [[vec0:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec0]] to <[[NUM]] x i32> + // CHECK: [[bvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer + // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec0]] to <[[NUM]] x i32> + res[1] = +things[0]; + + // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer + // CHECK: [[vec1:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec1]] to <[[NUM]] x i32> + // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer + // CHECK: [[vec2:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec2]] to <[[NUM]] x i32> + // CHECK: [[res2:%[0-9]*]] = add nuw nsw <[[NUM]] x i32> [[vec2]], [[vec1]] + // CHECK: [[bres2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res2]], zeroinitializer + // CHECK: [[res2:%[0-9][0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32> + res[2] = things[1] + things[2]; + + // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer + // CHECK: [[vec3:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec3]] to <[[NUM]] x i32> + // CHECK: [[res3:%[0-9]*]] = sub nsw <[[NUM]] x i32> [[vec2]], [[vec3]] + // CHECK: [[bres3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res3]], zeroinitializer + // CHECK: [[res3:%[0-9][0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32> + res[3] = things[2] - things[3]; + + // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer + // CHECK: [[vec4:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec4]] to <[[NUM]] x i32> + // CHECK: [[res4:%[0-9]*]] = mul nuw nsw <[[NUM]] x i32> [[vec4]], [[vec3]] + // CHECK: [[bres4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res4]], zeroinitializer + // CHECK: [[res4:%[0-9][0-9]*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32> + res[4] = things[3] * things[4]; + + // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer + // CHECK: [[vec5:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec5]] to <[[NUM]] x i32> + // CHECK: [[res5:%[0-9]*]] = sdiv <[[NUM]] x i32> [[vec4]], [[vec5]] + // CHECK: [[bres5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res5]], zeroinitializer + // CHECK: [[res5:%[0-9][0-9]*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32> + res[5] = things[4] / things[5]; + + // CHECK: [[bvec6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec6]], zeroinitializer + // CHECK: [[vec6:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec6]] to <[[NUM]] x i32> + // CHECK: [[res6:%[0-9]*]] = {{[ufs]?rem( fast)?}} <[[NUM]] x i32> [[vec5]], [[vec6]] + res[6] = things[5] % things[6]; + + // Stores into res[]. Previous were for things[] inout. + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 0 + // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 1 + // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[add1]] + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 2 + // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[add2]] + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 3 + // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[add3]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 4 + // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 5 + // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]] + // CHECK: ret void + + + return res; +} + +// Test arithmetic operators with scalars. +// CHECK-LABEL: define void @"\01?scarithmetic +export vector scarithmetic(inout vector things[10], bool scales[10])[10] { + vector res[10]; + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 0 + // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]] + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]] + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]] + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add6]] + + // CHECK: [[bvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer + // CHECK: [[vec0:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec0]] to <[[NUM]] x i32> + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 0 + // CHECK: [[scl0:%[0-9]*]] = load i32, i32* [[add0]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl0]], i32 0 + // CHECK: [[spt0:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res0:%[0-9]*]] = add <[[NUM]] x i32> [[spt0]], [[vec0]] + // CHECK: [[bres0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res0]], zeroinitializer + // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[bres0]] to <[[NUM]] x i32> + res[0] = things[0] + scales[0]; + + // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer + // CHECK: [[vec1:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec1]] to <[[NUM]] x i32> + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 1 + // CHECK: [[scl1:%[0-9]*]] = load i32, i32* [[add1]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl1]], i32 0 + // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res1:%[0-9]*]] = sub <[[NUM]] x i32> [[vec1]], [[spt1]] + // CHECK: [[bres1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res1]], zeroinitializer + // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32> + res[1] = things[1] - scales[1]; + + + // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer + // CHECK: [[vec2:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec2]] to <[[NUM]] x i32> + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 2 + // CHECK: [[scl2:%[0-9]*]] = load i32, i32* [[add2]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl2]], i32 0 + // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res2:%[0-9]*]] = mul nuw <[[NUM]] x i32> [[spt2]], [[vec2]] + // CHECK: [[bres2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res2]], zeroinitializer + // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32> + res[2] = things[2] * scales[2]; + + // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer + // CHECK: [[vec3:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec3]] to <[[NUM]] x i32> + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 3 + // CHECK: [[scl3:%[0-9]*]] = load i32, i32* [[add3]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl3]], i32 0 + // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res3:%[0-9]*]] = sdiv <[[NUM]] x i32> [[vec3]], [[spt3]] + // CHECK: [[bres3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res3]], zeroinitializer + // CHECK: [[res3:%[0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32> + res[3] = things[3] / scales[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 4 + // CHECK: [[scl4:%[0-9]*]] = load i32, i32* [[add4]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl4]], i32 0 + // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer + // CHECK: [[vec4:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec4]] to <[[NUM]] x i32> + // CHECK: [[res4:%[0-9]*]] = add <[[NUM]] x i32> [[spt4]], [[vec4]] + // CHECK: [[bres4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res4]], zeroinitializer + // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32> + res[4] = scales[4] + things[4]; + + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 5 + // CHECK: [[scl5:%[0-9]*]] = load i32, i32* [[add5]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl5]], i32 0 + // CHECK: [[spt5:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer + // CHECK: [[vec5:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec5]] to <[[NUM]] x i32> + // CHECK: [[res5:%[0-9]*]] = sub <[[NUM]] x i32> [[spt5]], [[vec5]] + // CHECK: [[bres5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res5]], zeroinitializer + // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32> + res[5] = scales[5] - things[5]; + + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x i32], [10 x i32]* %scales, i32 0, i32 6 + // CHECK: [[scl6:%[0-9]*]] = load i32, i32* [[add6]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x i32> undef, i32 [[scl6]], i32 0 + // CHECK: [[spt6:%[0-9]*]] = shufflevector <[[NUM]] x i32> [[spt]], <[[NUM]] x i32> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[bvec6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec6]], zeroinitializer + // CHECK: [[vec6:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec6]] to <[[NUM]] x i32> + // CHECK: [[res6:%[0-9]*]] = mul nuw <[[NUM]] x i32> [[spt6]], [[vec6]] + // CHECK: [[bres6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res6]], zeroinitializer + // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[bres6]] to <[[NUM]] x i32> + res[6] = scales[6] * things[6]; + + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 0 + // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 1 + // CHECK: store <[[NUM]] x i32> [[res1]], <[[NUM]] x i32>* [[add1]] + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 2 + // CHECK: store <[[NUM]] x i32> [[res2]], <[[NUM]] x i32>* [[add2]] + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 3 + // CHECK: store <[[NUM]] x i32> [[res3]], <[[NUM]] x i32>* [[add3]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 4 + // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 5 + // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]] + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 6 + // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[add6]] + // CHECK: ret void + + + return res; +} + +// Test logic operators. +// Only permissable in pre-HLSL2021 +// CHECK-LABEL: define void @"\01?logic +export vector logic(vector truth[10], vector consequences[10])[10] { + vector res[10]; + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 0 + // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]] + // CHECK: [[cmp:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer + // CHECK: [[cmp0:%[0-9]*]] = icmp eq <[[NUM]] x i1> [[cmp]], zeroinitializer + // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp0]] to <[[NUM]] x i32> + res[0] = !truth[0]; + + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]] + // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]] + // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer + // CHECK: [[bres1:%[0-9]*]] = or <[[NUM]] x i1> [[bvec2]], [[bvec1]] + // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32> + res[1] = truth[1] || truth[2]; + + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]] + // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer + // CHECK: [[bres2:%[0-9]*]] = and <[[NUM]] x i1> [[bvec3]], [[bvec2]] + // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32> + res[2] = truth[2] && truth[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]] + // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]] + // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer + // MORE STUFF + + res[3] = truth[3] ? truth[4] : truth[5]; + + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 0 + // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]] + // CHECK: [[bvec0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer + + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]] + // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer + // CHECK: [[bres4:%[0-9]*]] = icmp eq <[[NUM]] x i1> [[bvec0]], [[bvec1]] + // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32> + res[4] = consequences[0] == consequences[1]; + + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]] + // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer + // CHECK: [[bres5:%[0-9]*]] = icmp {{u?}}ne <[[NUM]] x i1> [[bvec1]], [[bvec2]] + // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32> + res[5] = consequences[1] != consequences[2]; + + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]] + // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer + // CHECK: [[bres6:%[0-9]*]] = icmp {{[osu]?}}lt <[[NUM]] x i1> [[bvec2]], [[bvec3]] + // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[bres6]] to <[[NUM]] x i32> + res[6] = consequences[2] < consequences[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]] + // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer + // CHECK: [[bres7:%[0-9]*]] = icmp {{[osu]]?}}gt <[[NUM]] x i1> [[bvec3]], [[bvec4]] + // CHECK: [[res7:%[0-9]*]] = zext <[[NUM]] x i1> [[bres7]] to <[[NUM]] x i32> + res[7] = consequences[3] > consequences[4]; + + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]] + // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer + // CHECK: [[bres8:%[0-9]*]] = icmp {{[osu]]?}}le <[[NUM]] x i1> [[bvec4]], [[bvec5]] + // CHECK: [[res8:%[0-9]*]] = zext <[[NUM]] x i1> [[bres8]] to <[[NUM]] x i32> + res[8] = consequences[4] <= consequences[5]; + + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %consequences, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add6]] + // CHECK: [[bvec6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec6]], zeroinitializer + // CHECK: [[bres9:%[0-9]*]] = icmp {{[osu]?}}ge <[[NUM]] x i1> [[bvec5]], [[bvec6]] + // CHECK: [[res9:%[0-9]*]] = zext <[[NUM]] x i1> [[bres9]] to <[[NUM]] x i32> + res[9] = consequences[5] >= consequences[6]; + + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 0 + // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 4 + // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 5 + // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]] + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 6 + // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[add6]] + // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 7 + // CHECK: store <[[NUM]] x i32> [[res7]], <[[NUM]] x i32>* [[add7]] + // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 8 + // CHECK: store <[[NUM]] x i32> [[res8]], <[[NUM]] x i32>* [[add8]] + // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 9 + // CHECK: store <[[NUM]] x i32> [[res9]], <[[NUM]] x i32>* [[add9]] + // CHECK: ret void + + return res; +} + +static const int Ix = 2; + +// Test indexing operators +// CHECK-LABEL: define void @"\01?index +export vector index(vector things[10], int i, bool val)[10] { + vector res[10]; + + // CHECK: [[res:%[0-9]*]] = alloca [10 x <[[NUM]] x i32>] + // CHECK: [[res0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 0 + // CHECK: store <[[NUM]] x i32> zeroinitializer, <[[NUM]] x i32>* [[res0]] + res[0] = 0; + + // CHECK: [[resi:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 %i + // CHECK: store <[[NUM]] x i32> , <[[NUM]] x i32>* [[resi]] + res[i] = 1; + + // CHECK: [[res2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 2 + // CHECK: store <[[NUM]] x i32> , <[[NUM]] x i32>* [[res2]] + res[Ix] = true; + + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 0 + // CHECK: [[thg0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]] + // CHECK: [[bthg0:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[thg0]], zeroinitializer + // CHECK: [[res3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 3 + // CHECK: [[thg0:%[0-9]*]] = zext <[[NUM]] x i1> [[bthg0]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[thg0]], <[[NUM]] x i32>* [[res3]] + res[3] = things[0]; + + // CHECK: [[addi:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 %i + // CHECK: [[thgi:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[addi]] + // CHECK: [[bthgi:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[thgi]], zeroinitializer + // CHECK: [[res4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 4 + // CHECK: [[thgi:%[0-9]*]] = zext <[[NUM]] x i1> [[bthgi]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[thgi]], <[[NUM]] x i32>* [[res4]] + res[4] = things[i]; + + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 2 + // CHECK: [[thg2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]] + // CHECK: [[bthg2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[thg2]], zeroinitializer + // CHECK: [[res5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* [[res]], i32 0, i32 5 + // CHECK: [[thg2:%[0-9]*]] = zext <[[NUM]] x i1> [[bthg2]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x i32> [[thg2]], <[[NUM]] x i32>* [[res5]] + res[5] = things[Ix]; + // CHECK: ret void + return res; + +} + +// Test bit twiddling operators. +// CHECK-LABEL: define void @"\01?bittwiddlers +export void bittwiddlers(inout vector things[10]) { + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]] + // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer + // CHECK: [[vec2:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec2]] to <[[NUM]] x i32> + + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]] + // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer + // CHECK: [[vec3:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec3]] to <[[NUM]] x i32> + // CHECK: [[res1:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec3]], [[vec2]] + // CHECK: [[bres1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res1]], zeroinitializer + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %things, i32 0, i32 1 + // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]] + things[1] = things[2] | things[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]] + // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer + // CHECK: [[bres2:%[0-9]*]] = and <[[NUM]] x i1> [[bvec4]], [[bvec3]] + // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32> + // CHECK: [[bres2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res2]], zeroinitializer + // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]] + things[2] = things[3] & things[4]; + + // CHECK: [[vec4:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec4]] to <[[NUM]] x i32> + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]] + // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer + // CHECK: [[vec5:%[0-9]*]] = zext <[[NUM]] x i1> [[bvec5]] to <[[NUM]] x i32> + // CHECK: [[res3:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]] + // CHECK: [[bres3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[res3]], zeroinitializer + // CHECK: [[res3:%[0-9]*]] = zext <[[NUM]] x i1> [[bres3]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]] + things[3] = things[4] ^ things[5]; + + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]] + // CHECK: [[bvec6:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec6]], zeroinitializer + // CHECK: [[bres4:%[0-9]*]] = or <[[NUM]] x i1> [[bvec6]], [[bvec4]] + // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[bres4]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]] + things[4] |= things[6]; + + // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7 + // CHECK: [[vec7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add7]] + // CHECK: [[bvec7:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec7]], zeroinitializer + // CHECK: [[bres5:%[0-9]*]] = and <[[NUM]] x i1> [[bvec7]], [[bvec5]] + // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[bres5]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]] + things[5] &= things[7]; + + // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8 + // CHECK: [[vec8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add8]] + // CHECK: [[bvec8:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec8]], zeroinitializer + // CHECK: [[bres6:%[0-9]*]] = xor <[[NUM]] x i1> [[bvec6]], [[bvec8]] + // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[bres6]] to <[[NUM]] x i32> + // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]] + things[6] ^= things[8]; + + // CHECK: ret void +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-int.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-int.hlsl new file mode 100644 index 0000000000..8c07f40af7 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators-int.hlsl @@ -0,0 +1,58 @@ +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint -DNUM=5 %s | FileCheck %s +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int64_t -DNUM=3 %s | FileCheck %s +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint16_t -DNUM=9 -enable-16bit-types %s | FileCheck %s + +// Test bitwise operators on an assortment vector sizes and integer types with 6.9 native vectors. + +// Test bit twiddling operators. +// CHECK-LABEL: define void @"\01?bittwiddlers +// CHECK-SAME: ([10 x <[[NUM:[0-9][0-9]*]] x [[TYPE:[a-z0-9]*]]>]* +export void bittwiddlers(inout vector things[10]) { + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]] + // CHECK: [[res1:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec1]], <[[TYPE]] -1, + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add0]] + things[0] = ~things[1]; + + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]] + + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]] + // CHECK: [[res1:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec3]], [[vec2]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]] + things[1] = things[2] | things[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]] + // CHECK: [[res2:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec4]], [[vec3]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]] + things[2] = things[3] & things[4]; + + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]] + // CHECK: [[res3:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]] + things[3] = things[4] ^ things[5]; + + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]] + // CHECK: [[res4:%[0-9]*]] = or <[[NUM]] x [[TYPE]]> [[vec6]], [[vec4]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]] + things[4] |= things[6]; + + // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7 + // CHECK: [[vec7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add7]] + // CHECK: [[res5:%[0-9]*]] = and <[[NUM]] x [[TYPE]]> [[vec7]], [[vec5]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]] + things[5] &= things[7]; + + // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8 + // CHECK: [[vec8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add8]] + // CHECK: [[res6:%[0-9]*]] = xor <[[NUM]] x [[TYPE]]> [[vec6]], [[vec8]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]] + things[6] ^= things[8]; + + // CHECK: ret void +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl new file mode 100644 index 0000000000..b3285267c9 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-operators.hlsl @@ -0,0 +1,492 @@ +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=2 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=3 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=4 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=5 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=6 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=7 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=8 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=9 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=10 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=11 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=12 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=13 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=14 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=15 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=16 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=17 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=18 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float -DNUM=128 %s | FileCheck %s --check-prefixes=CHECK,NODBL + +// Less exhaustive testing for some other types. +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int -DNUM=2 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint8_t4_packed -DNUM=4 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint -DNUM=5 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=double -DNUM=3 -DDBL %s | FileCheck %s --check-prefixes=CHECK,DBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=uint64_t -DNUM=9 %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=float16_t -DNUM=17 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL +// RUN: %dxc -HV 2018 -T lib_6_9 -DTYPE=int16_t -DNUM=177 -enable-16bit-types %s | FileCheck %s --check-prefixes=CHECK,NODBL + +// Test relevant operators on an assortment bool vector sizes and types with 6.9 native vectors. + +// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly. +// Uses non vector buffer to avoid interacting with that implementation. +// CHECK: %dx.types.ResRet.[[TY:[a-z0-9]*]] = type { [[TYPE:[a-z_0-9]*]] + +RWStructuredBuffer< TYPE > buf; + +export void assignments(inout vector things[10], TYPE scales[10]); +export vector arithmetic(inout vector things[11])[11]; +export vector scarithmetic(inout vector things[10], TYPE scales[10])[10]; +export vector logic(vector truth[10], vector consequences[10])[10]; +export vector index(vector things[10], int i, TYPE val)[10]; + +struct Interface { + vector assigned[10]; + vector arithmeticked[11]; + vector scarithmeticked[10]; + vector logicked[10]; + vector indexed[10]; + TYPE scales[10]; +}; + +#if 0 +// Requires vector loading support. Enable when available. +RWStructuredBuffer Input; +RWStructuredBuffer Output; + +TYPE g_val; + +[shader("compute")] +[numthreads(8,1,1)] +void main(uint GI : SV_GroupIndex) { + assignments(Output[GI].assigned, Input[GI].scales); + Output[GI].arithmeticked = arithmetic(Input[GI].arithmeticked); + Output[GI].scarithmeticked = scarithmetic(Input[GI].scarithmeticked, Input[GI].scales); + Output[GI].logicked = logic(Input[GI].logicked, Input[GI].assigned); + Output[GI].indexed = index(Input[GI].indexed, GI, g_val); +} +#endif + +// A mixed-type overload to test overload resolution and mingle different vector element types in ops +// Test assignment operators. +// CHECK-LABEL: define void @"\01?assignments +export void assignments(inout vector things[10], TYPE scales[10]) { + + // Another trick to capture the size. + // CHECK: [[res:%[0-9]*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle %{{[^,]*}}, i32 [[NUM:[0-9]*]] + // CHECK: [[scl:%[0-9]*]] = extractvalue %dx.types.ResRet.[[TY]] [[res]], 0 + TYPE scalar = buf.Load(NUM); + + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl]], i32 0 + // CHECK: [[res0:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res0]], <[[NUM]] x [[TYPE]]>* [[add0]] + things[0] = scalar; + + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]] + // CHECK: [[res1:%[0-9]*]] = [[ADD:f?add( fast)?]] <[[NUM]] x [[TYPE]]> [[vec1]], [[vec5]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]] + things[1] += things[5]; + + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]] + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]] + // CHECK: [[res2:%[0-9]*]] = [[SUB:f?sub( fast)?]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec6]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]] + things[2] -= things[6]; + + // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7 + // CHECK: [[vec7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add7]] + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]] + // CHECK: [[res3:%[0-9]*]] = [[MUL:f?mul( fast)?]] <[[NUM]] x [[TYPE]]> [[vec3]], [[vec7]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]] + things[3] *= things[7]; + + // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8 + // CHECK: [[vec8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add8]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]] + // CHECK: [[res4:%[0-9]*]] = [[DIV:[ufs]?div( fast)?]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec8]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]] + things[4] /= things[8]; + + // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9 + // CHECK: [[vec9:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add9]] +#ifdef DBL + // DBL: [[fvec9:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec9]] to <[[NUM]] x float> + // DBL: [[fvec5:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec5]] to <[[NUM]] x float> + // DBL: [[fres5:%[0-9]*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x float> [[fvec5]], [[fvec9]] + // DBL: [[res5:%[0-9]*]] = fpext <[[NUM]] x float> [[fres5]] to <[[NUM]] x double> + vector f9 = things[9]; + vector f5 = things[5]; + f5 %= f9; + things[5] = f5; +#else + // NODBL: [[res5:%[0-9]*]] = [[REM:[ufs]?rem( fast)?]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec9]] + things[5] %= things[9]; +#endif + // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]] + + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 1 + // CHECK: [[scl1:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add1]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl1]], i32 0 + // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res6:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt1]], [[vec6]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]] + things[6] += scales[1]; + + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 2 + // CHECK: [[scl2:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add2]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl2]], i32 0 + // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res7:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec7]], [[spt2]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[add7]] + things[7] -= scales[2]; + + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 3 + // CHECK: [[scl3:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add3]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl3]], i32 0 + // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res8:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt3]], [[vec8]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[add8]] + things[8] *= scales[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 4 + // CHECK: [[scl4:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add4]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl4]], i32 0 + // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res9:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec9]], [[spt4]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[add9]] + things[9] /= scales[4]; + +} + +// Test arithmetic operators. +// CHECK-LABEL: define void @"\01?arithmetic +export vector arithmetic(inout vector things[11])[11] { + vector res[11]; + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0 + // CHECK: [[res1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add0]] + // CHECK: [[res0:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]] + res[0] = -things[0]; + res[1] = +things[0]; + + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]] + // CHECK: [[res2:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec1]] + res[2] = things[1] + things[2]; + + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]] + // CHECK: [[res3:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]] + res[3] = things[2] - things[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]] + // CHECK: [[res4:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec3]] + res[4] = things[3] * things[4]; + + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]] + // CHECK: [[res5:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]] + res[5] = things[4] / things[5]; + + // DBL: [[fvec5:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec5]] to <[[NUM]] x float> + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]] +#ifdef DBL + // DBL: [[fvec6:%[0-9]*]] = fptrunc <[[NUM]] x double> [[vec6]] to <[[NUM]] x float> + // DBL: [[fres6:%[0-9]*]] = [[REM]] <[[NUM]] x float> [[fvec5]], [[fvec6]] + // DBL: [[res6:%[0-9]*]] = fpext <[[NUM]] x float> [[fres6]] to <[[NUM]] x double> + res[6] = (vector)things[5] % (vector)things[6]; +#else + // NODBL: [[res6:%[0-9]*]] = [[REM]] <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]] + res[6] = things[5] % things[6]; +#endif + + // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 7 + // CHECK: [[vec7:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add7]] + // CHECK: [[res7:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec7]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res7]], <[[NUM]] x [[TYPE]]>* [[add7]] + res[7] = things[7]++; + + // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 8 + // CHECK: [[vec8:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add8]] + // CHECK: [[res8:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec8]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res8]], <[[NUM]] x [[TYPE]]>* [[add8]] + res[8] = things[8]--; + + // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 9 + // CHECK: [[vec9:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add9]] + // CHECK: [[res9:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec9]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[add9]] + res[9] = ++things[9]; + + // CHECK: [[add10:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 10 + // CHECK: [[vec10:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add10]] + // CHECK: [[res10:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[vec10]] + // CHECK: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[add10]] + res[10] = --things[10]; + + // Stores into res[]. Previous were for things[] inout. + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res0]], <[[NUM]] x [[TYPE]]>* [[add0]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 1 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]] + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 2 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]] + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 3 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 4 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 5 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]] + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 6 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]] + // These two were post ops, so the original value goes into res[]. + // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 7 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec7]], <[[NUM]] x [[TYPE]]>* [[add7]] + // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 8 + // CHECK: store <[[NUM]] x [[TYPE]]> [[vec8]], <[[NUM]] x [[TYPE]]>* [[add8]] + // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 9 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res9]], <[[NUM]] x [[TYPE]]>* [[add9]] + // CHECK: [[add10:%[0-9]*]] = getelementptr inbounds [11 x <[[NUM]] x [[TYPE]]>], [11 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 10 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res10]], <[[NUM]] x [[TYPE]]>* [[add10]] + // CHECK: ret void + + + return res; +} + +// Test arithmetic operators with scalars. +// CHECK-LABEL: define void @"\01?scarithmetic +export vector scarithmetic(inout vector things[10], TYPE scales[10])[10] { + vector res[10]; + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0 + // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add0]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]] + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]] + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]] + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]] + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 0 + // CHECK: [[scl0:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add0]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl0]], i32 0 + // CHECK: [[spt0:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res0:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt0]], [[vec0]] + res[0] = things[0] + scales[0]; + + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 1 + // CHECK: [[scl1:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add1]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl1]], i32 0 + // CHECK: [[spt1:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res1:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[vec1]], [[spt1]] + res[1] = things[1] - scales[1]; + + + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 2 + // CHECK: [[scl2:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add2]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl2]], i32 0 + // CHECK: [[spt2:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res2:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt2]], [[vec2]] + res[2] = things[2] * scales[2]; + + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 3 + // CHECK: [[scl3:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add3]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl3]], i32 0 + // CHECK: [[spt3:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res3:%[0-9]*]] = [[DIV]] <[[NUM]] x [[TYPE]]> [[vec3]], [[spt3]] + res[3] = things[3] / scales[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 4 + // CHECK: [[scl4:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add4]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl4]], i32 0 + // CHECK: [[spt4:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res4:%[0-9]*]] = [[ADD]] <[[NUM]] x [[TYPE]]> [[spt4]], [[vec4]] + res[4] = scales[4] + things[4]; + + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 5 + // CHECK: [[scl5:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add5]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl5]], i32 0 + // CHECK: [[spt5:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res5:%[0-9]*]] = [[SUB]] <[[NUM]] x [[TYPE]]> [[spt5]], [[vec5]] + res[5] = scales[5] - things[5]; + + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x [[TYPE]]], [10 x [[TYPE]]]* %scales, i32 0, i32 6 + // CHECK: [[scl6:%[0-9]*]] = load [[TYPE]], [[TYPE]]* [[add6]] + // CHECK: [[spt:%[0-9]*]] = insertelement <[[NUM]] x [[TYPE]]> undef, [[TYPE]] [[scl6]], i32 0 + // CHECK: [[spt6:%[0-9]*]] = shufflevector <[[NUM]] x [[TYPE]]> [[spt]], <[[NUM]] x [[TYPE]]> undef, <[[NUM]] x i32> zeroinitializer + // CHECK: [[res6:%[0-9]*]] = [[MUL]] <[[NUM]] x [[TYPE]]> [[spt6]], [[vec6]] + res[6] = scales[6] * things[6]; + + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 0 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res0]], <[[NUM]] x [[TYPE]]>* [[add0]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 1 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res1]], <[[NUM]] x [[TYPE]]>* [[add1]] + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 2 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res2]], <[[NUM]] x [[TYPE]]>* [[add2]] + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 3 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res3]], <[[NUM]] x [[TYPE]]>* [[add3]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 4 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res4]], <[[NUM]] x [[TYPE]]>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 5 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res5]], <[[NUM]] x [[TYPE]]>* [[add5]] + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %agg.result, i32 0, i32 6 + // CHECK: store <[[NUM]] x [[TYPE]]> [[res6]], <[[NUM]] x [[TYPE]]>* [[add6]] + // CHECK: ret void + + + return res; +} + +// Test logic operators. +// Only permissable in pre-HLSL2021 +// CHECK-LABEL: define void @"\01?logic +export vector logic(vector truth[10], vector consequences[10])[10] { + vector res[10]; + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 0 + // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add0]] + // CHECK: [[cmp:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec0]], zeroinitializer + // CHECK: [[cmp0:%[0-9]*]] = icmp eq <[[NUM]] x i1> [[cmp]], zeroinitializer + // CHECK: [[res0:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp0]] to <[[NUM]] x i32> + res[0] = !truth[0]; + + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add1]] + // CHECK: [[bvec1:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec1]], zeroinitializer + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add2]] + // CHECK: [[bvec2:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec2]], zeroinitializer + // CHECK: [[bres1:%[0-9]*]] = or <[[NUM]] x i1> [[bvec2]], [[bvec1]] + // CHECK: [[res1:%[0-9]*]] = zext <[[NUM]] x i1> [[bres1]] to <[[NUM]] x i32> + res[1] = truth[1] || truth[2]; + + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add3]] + // CHECK: [[bvec3:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec3]], zeroinitializer + // CHECK: [[bres2:%[0-9]*]] = and <[[NUM]] x i1> [[bvec3]], [[bvec2]] + // CHECK: [[res2:%[0-9]*]] = zext <[[NUM]] x i1> [[bres2]] to <[[NUM]] x i32> + res[2] = truth[2] && truth[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add4]] + // CHECK: [[bvec4:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec4]], zeroinitializer + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %truth, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x i32>, <[[NUM]] x i32>* [[add5]] + // CHECK: [[bvec5:%[0-9]*]] = icmp ne <[[NUM]] x i32> [[vec5]], zeroinitializer + + // NOT RIGHT STUFF.. Select is still extracting everything, slows WAY down with over 100 elements + + res[3] = truth[3] ? truth[4] : truth[5]; + + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 0 + // CHECK: [[vec0:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add0]] + // CHECK: [[add1:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 1 + // CHECK: [[vec1:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add1]] + // CHECK: [[cmp4:%[0-9]*]] = [[CMP:[fi]?cmp( fast)?]] {{o?}}eq <[[NUM]] x [[TYPE]]> [[vec0]], [[vec1]] + // CHECK: [[res4:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp4]] to <[[NUM]] x i32> + res[4] = consequences[0] == consequences[1]; + + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 2 + // CHECK: [[vec2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]] + // CHECK: [[cmp5:%[0-9]*]] = [[CMP]] {{u?}}ne <[[NUM]] x [[TYPE]]> [[vec1]], [[vec2]] + // CHECK: [[res5:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp5]] to <[[NUM]] x i32> + res[5] = consequences[1] != consequences[2]; + + // CHECK: [[add3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 3 + // CHECK: [[vec3:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add3]] + // CHECK: [[cmp6:%[0-9]*]] = [[CMP]] {{[osu]?}}lt <[[NUM]] x [[TYPE]]> [[vec2]], [[vec3]] + // CHECK: [[res6:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp6]] to <[[NUM]] x i32> + res[6] = consequences[2] < consequences[3]; + + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 4 + // CHECK: [[vec4:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add4]] + // CHECK: [[cmp7:%[0-9]*]] = [[CMP]] {{[osu]]?}}gt <[[NUM]] x [[TYPE]]> [[vec3]], [[vec4]] + // CHECK: [[res7:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp7]] to <[[NUM]] x i32> + res[7] = consequences[3] > consequences[4]; + + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 5 + // CHECK: [[vec5:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add5]] + // CHECK: [[cmp8:%[0-9]*]] = [[CMP]] {{[osu]]?}}le <[[NUM]] x [[TYPE]]> [[vec4]], [[vec5]] + // CHECK: [[res8:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp8]] to <[[NUM]] x i32> + res[8] = consequences[4] <= consequences[5]; + + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %consequences, i32 0, i32 6 + // CHECK: [[vec6:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add6]] + // CHECK: [[cmp9:%[0-9]*]] = [[CMP]] {{[osu]?}}ge <[[NUM]] x [[TYPE]]> [[vec5]], [[vec6]] + // CHECK: [[res9:%[0-9]*]] = zext <[[NUM]] x i1> [[cmp9]] to <[[NUM]] x i32> + res[9] = consequences[5] >= consequences[6]; + + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 0 + // CHECK: store <[[NUM]] x i32> [[res0]], <[[NUM]] x i32>* [[add0]] + // CHECK: [[add4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 4 + // CHECK: store <[[NUM]] x i32> [[res4]], <[[NUM]] x i32>* [[add4]] + // CHECK: [[add5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 5 + // CHECK: store <[[NUM]] x i32> [[res5]], <[[NUM]] x i32>* [[add5]] + // CHECK: [[add6:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 6 + // CHECK: store <[[NUM]] x i32> [[res6]], <[[NUM]] x i32>* [[add6]] + // CHECK: [[add7:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 7 + // CHECK: store <[[NUM]] x i32> [[res7]], <[[NUM]] x i32>* [[add7]] + // CHECK: [[add8:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 8 + // CHECK: store <[[NUM]] x i32> [[res8]], <[[NUM]] x i32>* [[add8]] + // CHECK: [[add9:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x i32>], [10 x <[[NUM]] x i32>]* %agg.result, i32 0, i32 9 + // CHECK: store <[[NUM]] x i32> [[res9]], <[[NUM]] x i32>* [[add9]] + // CHECK: ret void + + return res; +} + +static const int Ix = 2; + +// Test indexing operators +// CHECK-LABEL: define void @"\01?index +export vector index(vector things[10], int i, TYPE val)[10] { + vector res[10]; + + // CHECK: [[res:%[0-9]*]] = alloca [10 x <[[NUM]] x [[TYPE]]>] + // CHECK: [[res0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 0 + // CHECK: store <[[NUM]] x [[TYPE]]> zeroinitializer, <[[NUM]] x [[TYPE]]>* [[res0]] + res[0] = 0; + + // CHECK: [[resi:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 %i + // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] {{(1|0xH3C00).*}}>, <[[NUM]] x [[TYPE]]>* [[resi]] + res[i] = 1; + + // CHECK: [[res2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 2 + // CHECK: store <[[NUM]] x [[TYPE]]> <[[TYPE]] {{(2|0xH4000).*}}>, <[[NUM]] x [[TYPE]]>* [[res2]] + res[Ix] = 2; + + // CHECK: [[add0:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 0 + // CHECK: [[thg0:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add0]] + // CHECK: [[res3:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 3 + // CHECK: store <[[NUM]] x [[TYPE]]> [[thg0]], <[[NUM]] x [[TYPE]]>* [[res3]] + res[3] = things[0]; + + // CHECK: [[addi:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 %i + // CHECK: [[thgi:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[addi]] + // CHECK: [[res4:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 4 + // CHECK: store <[[NUM]] x [[TYPE]]> [[thgi]], <[[NUM]] x [[TYPE]]>* [[res4]] + res[4] = things[i]; + + // CHECK: [[add2:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* %things, i32 0, i32 2 + // CHECK: [[thg2:%[0-9]*]] = load <[[NUM]] x [[TYPE]]>, <[[NUM]] x [[TYPE]]>* [[add2]] + // CHECK: [[res5:%[0-9]*]] = getelementptr inbounds [10 x <[[NUM]] x [[TYPE]]>], [10 x <[[NUM]] x [[TYPE]]>]* [[res]], i32 0, i32 5 + // CHECK: store <[[NUM]] x [[TYPE]]> [[thg2]], <[[NUM]] x [[TYPE]]>* [[res5]] + res[5] = things[Ix]; + // CHECK: ret void + return res; +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvecs.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvecs.hlsl new file mode 100644 index 0000000000..1910e08a25 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvecs.hlsl @@ -0,0 +1,154 @@ +// RUN: %dxc -Wno-conversion -T cs_6_9 %s | FileCheck %s --check-prefixes=CHECK,F32 +// RUN: %dxc -Wno-conversion -T cs_6_9 -DF64 %s | FileCheck %s --check-prefixes=CHECK,F64 + +RWByteAddressBuffer buf; + +// "TYPE" is the mainly focused test type. +// "UNTYPE" is the other type used for mixed precision testing. +#ifdef F64 +typedef double TYPE; +typedef float UNTYPE; +#else +typedef float TYPE; +typedef double UNTYPE; +#endif + +// Two main test function overloads. One expects matching element types. +// The other uses different types to test ops and overload resolution. +template vector dostuff(vector thing1, vector thing2, vector thing3); +vector dostuff(vector thing1, vector thing2, vector thing3); + +// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly. +// F32-DAG: %dx.types.ResRet.[[TY:v8f32]] = type { [[TYPE:<8 x float>]] +// F32-DAG: %dx.types.ResRet.[[UNTY:v8f64]] = type { [[UNTYPE:<8 x double>]] +// F64-DAG: %dx.types.ResRet.[[TY:v8f64]] = type { [[TYPE:<8 x double>]] +// F64-DAG: %dx.types.ResRet.[[UNTY:v8f32]] = type { [[UNTYPE:<8 x float>]] + +// Verify that groupshared vectors are kept as aggregates +// CHECK: @"\01?gs_vec1@@3V?$vector@{{M|N}}$07@@A" = external addrspace(3) global [[TYPE]] +// CHECK: @"\01?gs_vec2@@3V?$vector@{{M|N}}$07@@A" = external addrspace(3) global [[TYPE]] +// CHECK: @"\01?gs_vec3@@3V?$vector@{{M|N}}$07@@A" = external addrspace(3) global [[TYPE]] +groupshared vector gs_vec1, gs_vec2, gs_vec3; + +[numthreads(8,1,1)] +void main() { + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + + // CHECK: [[vec1_res:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK-DAG: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec1_res]], 0 + // F32-DAG: [[vec1_32:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec1_res]], 0 + // F64-DAG: [[vec1_64:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec1_res]], 0 + vector vec1 = buf.Load >(0); + + // CHECK: [[vec2_res:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[buf]], i32 60 + // CHECK-DAG: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec2_res]], 0 + // F32-DAG: [[vec2_32:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec2_res]], 0 + // F64-DAG: [[vec2_64:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec2_res]], 0 + vector vec2 = buf.Load >(60); + + // CHECK: [[vec3_res:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[buf]], i32 120 + // CHECK-DAG: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec3_res]], 0 + // F64-DAG: [[vec3_64:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec3_res]], 0 + vector vec3 = buf.Load >(120); + + // CHECK: [[unvec_res:%.*]] = call %dx.types.ResRet.[[UNTY]] @dx.op.rawBufferVectorLoad.[[UNTY]](i32 303, %dx.types.Handle [[buf]], i32 180 + // CHECK-DAG: [[unvec:%.*]] = extractvalue %dx.types.ResRet.[[UNTY]] [[unvec_res]], 0 + // F32-DAG: [[unvec_64:%.*]] = extractvalue %dx.types.ResRet.[[UNTY]] [[unvec_res]], 0 + // F64-DAG: [[unvec_32:%.*]] = extractvalue %dx.types.ResRet.[[UNTY]] [[unvec_res]], 0 + vector unvec = buf.Load >(180); + + vec1 = dostuff(vec1, vec2, vec3); + + // Test mixed type operations + vec2 = dostuff(vec2, unvec, vec3); + + gs_vec2 = dostuff(gs_vec1, gs_vec2, gs_vec3); + + // mix groupshared and non + //vec1 = dostuff(vec1, gs_vec2, vec3); + + buf.Store >(240, vec1 * vec2 - vec3 * gs_vec1 + gs_vec2 / gs_vec3); +} + +// Test the required ops on long vectors and confirm correct lowering. +template +vector dostuff(vector thing1, vector thing2, vector thing3) { + vector res = 0; + + // CHECK: call [[TYPE]] @dx.op.binary.[[TY]](i32 36, [[TYPE]] [[vec1]], [[TYPE]] [[vec2]]) ; FMin(a,b) + res += min(thing1, thing2); + // CHECK: call [[TYPE]] @dx.op.binary.[[TY]](i32 35, [[TYPE]] [[vec1]], [[TYPE]] [[vec3]]) ; FMax(a,b) + res += max(thing1, thing3); + + // CHECK: [[tmp:%.*]] = call [[TYPE]] @dx.op.binary.[[TY]](i32 35, [[TYPE]] [[vec1]], [[TYPE]] [[vec2]]) ; FMax(a,b) + // CHECK: call [[TYPE]] @dx.op.binary.[[TY]](i32 36, [[TYPE]] [[tmp]], [[TYPE]] [[vec3]]) ; FMin(a,b) + res += clamp(thing1, thing2, thing3); + + // F32: [[vec3_64:%.*]] = fpext <8 x float> [[vec3]] to <8 x double> + // F32: [[vec2_64:%.*]] = fpext <8 x float> [[vec2]] to <8 x double> + // F32: [[vec1_64:%.*]] = fpext <8 x float> [[vec1]] to <8 x double> + // CHECK: call <8 x double> @dx.op.tertiary.v8f64(i32 47, <8 x double> [[vec1_64]], <8 x double> [[vec2_64]], <8 x double> [[vec3_64]]) ; Fma(a,b,c) + res += (vector)fma((vector)thing1, (vector)(thing2), (vector)thing3); + + // Even in the double test, these will be downconverted because these builtins only take floats. + // F64: [[vec2_32:%.*]] = fptrunc <8 x double> [[vec2]] to <8 x float> + // F64: [[vec1_32:%.*]] = fptrunc <8 x double> [[vec1]] to <8 x float> + + // CHECK: [[tmp:%.*]] = fcmp fast olt <8 x float> [[vec2_32]], [[vec1_32]] + // CHECK: select <8 x i1> [[tmp]], [[TYPE]] zeroinitializer, [[TYPE]] + res += step(thing1, thing2); + + // CHECK: [[tmp:%.*]] = fmul fast <8 x float> [[vec1_32]], @dx.op.unary.v8f32(i32 21, <8 x float> [[tmp]]) ; Exp(value) + res += exp(thing1); + + // CHECK: [[tmp:%.*]] = call <8 x float> @dx.op.unary.v8f32(i32 23, <8 x float> [[vec1_32]]) ; Log(value) + // CHECK: fmul fast <8 x float> [[tmp]], @dx.op.unary.v8f32(i32 20, <8 x float> [[vec1_32]]) ; Htan(value) + res += tanh(thing1); + // CHECK: call <8 x float> @dx.op.unary.v8f32(i32 17, <8 x float> [[vec1_32]]) ; Atan(value) + res += atan(thing1); + + return res; +} + +// A mixed-type overload to test overload resolution and mingle different vector element types in ops +vector dostuff(vector thing1, vector thing2, vector thing3) { + vector res = 0; + + // F64: [[unvec_64:%.*]] = fpext <8 x float> [[unvec]] to <8 x double> + // CHECK: call <8 x double> @dx.op.binary.v8f64(i32 36, <8 x double> [[vec2_64]], <8 x double> [[unvec_64]]) ; FMin(a,b) + res += min(thing1, thing2); + + // CHECK: call [[TYPE]] @dx.op.binary.[[TY]](i32 35, [[TYPE]] [[vec2]], [[TYPE]] [[vec3]]) ; FMax(a,b) + res += max(thing1, thing3); + + // CHECK: [[tmp:%.*]] = call <8 x double> @dx.op.binary.v8f64(i32 35, <8 x double> [[vec2_64]], <8 x double> [[unvec_64]]) ; FMax(a,b) + // CHECK: call <8 x double> @dx.op.binary.v8f64(i32 36, <8 x double> [[tmp]], <8 x double> [[vec3_64]]) ; FMin(a,b) + res += clamp(thing1, thing2, thing3); + + // CHECK: call <8 x double> @dx.op.tertiary.v8f64(i32 47, <8 x double> [[vec2_64]], <8 x double> [[unvec_64]], <8 x double> [[vec3_64]]) ; Fma(a,b,c) + res += (vector)fma((vector)thing1, (vector)(thing2), (vector)thing3); + + // F32: [[unvec_32:%.*]] = fptrunc <8 x double> [[unvec]] to <8 x float> + // CHECK: [[tmp:%.*]] = fcmp fast olt <8 x float> [[unvec_32]], [[vec2_32]] + // CHECK: select <8 x i1> [[tmp]], [[TYPE]] zeroinitializer, [[TYPE]] + res += step(thing1, thing2); + + // CHECK: [[tmp:%.*]] = fmul fast <8 x float> [[vec2_32]], @dx.op.unary.v8f32(i32 21, <8 x float> [[tmp]]) ; Exp(value) + res += exp(thing1); + + // CHECK: [[tmp:%.*]] = call <8 x float> @dx.op.unary.v8f32(i32 23, <8 x float> [[vec2_32]]) ; Log(value) + // CHECK: fmul fast <8 x float> [[tmp]], @dx.op.unary.v8f32(i32 20, <8 x float> [[vec2_32]]) ; Htan(value) + res += tanh(thing1); + // CHECK: call <8 x float> @dx.op.unary.v8f32(i32 17, <8 x float> [[vec2_32]]) ; Atan(value) + res += atan(thing1); + + return res; +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix-ast.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix-ast.hlsl index 33086852ab..5443ada0c9 100644 --- a/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix-ast.hlsl +++ b/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix-ast.hlsl @@ -15,6 +15,7 @@ // ext_vector array. // CHECK-NEXT: CXXRecordDecl {{0x[0-9a-fA-F]+}} <> implicit class matrix definition // CHECK-NEXT: FinalAttr {{0x[0-9a-fA-F]+}} <> Implicit final +// CHECK-NEXT: HLSLMatrixAttr {{0x[0-9a-fA-F]+}} <> Implicit // CHECK-NEXT: FieldDecl {{0x[0-9a-fA-F]+}} <> implicit h 'element [row_count] __attribute__((ext_vector_type(col_count)))' diff --git a/tools/clang/test/HLSLFileCheck/hlsl/types/vector/vector-ast.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/types/vector/vector-ast.hlsl index 0ad236a4b2..12859b7eda 100644 --- a/tools/clang/test/HLSLFileCheck/hlsl/types/vector/vector-ast.hlsl +++ b/tools/clang/test/HLSLFileCheck/hlsl/types/vector/vector-ast.hlsl @@ -12,6 +12,7 @@ // Verify the class, final attribute and ext_vector field decl. // CHECK-NEXT: CXXRecordDecl {{0x[0-9a-fA-F]+}} <> implicit class vector definition // CHECK-NEXT: FinalAttr {{0x[0-9a-fA-F]+}} <> Implicit final +// CHECK-NEXT: HLSLVectorAttr {{0x[0-9a-fA-F]+}} <> Implicit // CHECK-NEXT: FieldDecl {{0x[0-9a-fA-F]+}} <> implicit h 'element __attribute__((ext_vector_type(element_count)))' // Verify operator overloads for const vector subscript operators. diff --git a/tools/clang/test/SemaHLSL/const-default.hlsl b/tools/clang/test/SemaHLSL/const-default.hlsl index 2ebb6fe52e..6b5e43e0e9 100644 --- a/tools/clang/test/SemaHLSL/const-default.hlsl +++ b/tools/clang/test/SemaHLSL/const-default.hlsl @@ -33,7 +33,11 @@ class MyClass { ConstantBuffer g_const_buffer2; TextureBuffer g_texture_buffer2; +// expected-note@+2 {{forward declaration of 'FWDDeclStruct'}} +// expected-note@+1 {{forward declaration of 'FWDDeclStruct'}} struct FWDDeclStruct; +// expected-note@+2 {{forward declaration of 'FWDDeclClass'}} +// expected-note@+1 {{forward declaration of 'FWDDeclClass'}} class FWDDeclClass; // Ensure forward declared struct/class fails as expected diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls-hs.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls-hs.hlsl new file mode 100644 index 0000000000..1625454360 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls-hs.hlsl @@ -0,0 +1,24 @@ +// RUN: %dxc -DTYPE=float -DNUM=7 -T hs_6_9 -verify %s + +struct HsConstantData { + float Edges[3] : SV_TessFactor; + vector vec; +}; + +struct LongVec { + float4 f; + vector vec; +}; + +HsConstantData PatchConstantFunction( // expected-error{{vectors of over 4 elements in patch constant function return type are not supported}} + vector vec : V, // expected-error{{vectors of over 4 elements in patch constant function parameters are not supported}} + LongVec lv : L) { // expected-error{{vectors of over 4 elements in patch constant function parameters are not supported}} + return (HsConstantData)0; +} + +[domain("tri")] +[outputtopology("triangle_cw")] +[outputcontrolpoints(32)] +[patchconstantfunc("PatchConstantFunction")] +void main() { +} diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl new file mode 100644 index 0000000000..0604feeaec --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl @@ -0,0 +1,200 @@ +// RUN: %dxc -T ps_6_9 -DTYPE=LongVec -DNUM=5 -verify %s +// RUN: %dxc -T ps_6_9 -DTYPE=LongVecSub -DNUM=128 -verify %s +// RUN: %dxc -T ps_6_9 -DNUM=1024 -verify %s + +// Add tests for base types and instantiated template classes with longvecs +// Size of the vector shouldn't matter, but using a few different ones just in case. + +#define PASTE_(x,y) x##y +#define PASTE(x,y) PASTE_(x,y) + +#ifndef TYPE +#define TYPE LongVecTpl +#endif + +struct LongVec { + float4 f; + vector vec; +}; + +struct LongVecSub : LongVec { + int3 is; +}; + +template +struct LongVecTpl { + float4 f; + vector vec; +}; + +vector global_vec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} +vector global_vec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} +TYPE global_vec_rec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} +TYPE global_vec_rec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} + +cbuffer BadBuffy { + vector cb_vec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} + vector cb_vec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} + TYPE cb_vec_rec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} + TYPE cb_vec_rec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} +}; + +tbuffer BadTuffy { + vector tb_vec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} + vector tb_vec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} + TYPE tb_vec_rec; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} + TYPE tb_vec_rec_arr[10]; // expected-error{{vectors of over 4 elements in cbuffers or tbuffers are not supported}} +}; + +ConstantBuffer< TYPE > const_buf; // expected-error{{vectors of over 4 elements in ConstantBuffers or TextureBuffers are not supported}} +TextureBuffer< TYPE > tex_buf; // expected-error{{vectors of over 4 elements in ConstantBuffers or TextureBuffers are not supported}} + +[shader("pixel")] +vector main( // expected-error{{vectors of over 4 elements in entry function return type are not supported}} + vector vec : V) : SV_Target { // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} + return vec; +} + +[shader("vertex")] +TYPE vs_main( // expected-error{{vectors of over 4 elements in entry function return type are not supported}} + TYPE parm : P) : SV_Target { // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} + parm.f = 0; + return parm; +} + + +[shader("geometry")] +[maxvertexcount(3)] +void gs_point(line TYPE e, // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} + inout PointStream OutputStream0) {} // expected-error{{vectors of over 4 elements in geometry streams are not supported}} + +[shader("geometry")] +[maxvertexcount(12)] +void gs_line(line TYPE a, // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} + inout LineStream OutputStream0) {} // expected-error{{vectors of over 4 elements in geometry streams are not supported}} + + +[shader("geometry")] +[maxvertexcount(12)] +void gs_line(line TYPE a, // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} + inout TriangleStream OutputStream0) {} // expected-error{{vectors of over 4 elements in geometry streams are not supported}} + +[shader("domain")] +[domain("tri")] +void ds_main(OutputPatch TrianglePatch) {} // expected-error{{vectors of over 4 elements in tessellation patches are not supported}} + +void patch_const(InputPatch inpatch, // expected-error{{vectors of over 4 elements in tessellation patches are not supported}} + OutputPatch outpatch) {} // expected-error{{vectors of over 4 elements in tessellation patches are not supported}} + +[shader("hull")] +[domain("tri")] +[outputtopology("triangle_cw")] +[outputcontrolpoints(32)] +[patchconstantfunc("patch_const")] +void hs_main(InputPatch TrianglePatch) {} // expected-error{{vectors of over 4 elements in tessellation patches are not supported}} + +RaytracingAccelerationStructure RTAS; + +struct [raypayload] DXRLongVec { + float4 f : write(closesthit) : read(caller); + vector vec : write(closesthit) : read(caller); +}; + +struct [raypayload] DXRLongVecSub : DXRLongVec { + int3 is : write(closesthit) : read(caller); +}; + +template +struct [raypayload] DXRLongVecTpl { + float4 f : write(closesthit) : read(caller); + vector vec : write(closesthit) : read(caller); +}; + +#define RTTYPE PASTE(DXR,TYPE) + +[shader("raygeneration")] +void raygen() { + RTTYPE p = (RTTYPE)0; + RayDesc ray = (RayDesc)0; + TraceRay(RTAS, RAY_FLAG_NONE, 0, 0, 1, 0, ray, p); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} + CallShader(0, p); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} +} + + +[shader("closesthit")] +void closesthit(inout RTTYPE payload, // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} + in RTTYPE attribs ) { // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} + RayDesc ray; + TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} + CallShader(0, payload); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} +} + +[shader("anyhit")] +void AnyHit( inout RTTYPE payload, // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} + in RTTYPE attribs ) // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} +{ +} + +[shader("miss")] +void Miss(inout RTTYPE payload){ // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} + RayDesc ray; + TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} + CallShader(0, payload); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} +} + +[shader("intersection")] +void Intersection() { + float hitT = RayTCurrent(); + RTTYPE attr = (RTTYPE)0; + bool bReported = ReportHit(hitT, 0, attr); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} +} + +[shader("callable")] +void callable1(inout RTTYPE p) { // expected-error{{vectors of over 4 elements in entry function parameters are not supported}} + CallShader(0, p); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} +} + +groupshared LongVec as_pld; + +[shader("amplification")] +[numthreads(1,1,1)] +void Amp() { + DispatchMesh(1,1,1,as_pld); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} +} + +struct NodeLongVec { + uint3 grid : SV_DispatchGrid; + vector vec; +}; + +struct NodeLongVecSub : NodeLongVec { + int3 is; +}; + +template +struct NodeLongVecTpl { + uint3 grid : SV_DispatchGrid; + vector vec; +}; + +#define NTYPE PASTE(Node,TYPE) + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(8,1,1)] +[NodeMaxDispatchGrid(8,1,1)] +void broadcast(DispatchNodeInputRecord input, // expected-error{{vectors of over 4 elements in node records are not supported}} + NodeOutput output) // expected-error{{vectors of over 4 elements in node records are not supported}} +{ + ThreadNodeOutputRecords touts; // expected-error{{vectors of over 4 elements in node records are not supported}} + GroupNodeOutputRecords gouts; // expected-error{{vectors of over 4 elements in node records are not supported}} +} + +[Shader("node")] +[NodeLaunch("coalescing")] +[NumThreads(8,1,1)] +void coalesce(GroupNodeInputRecords input) {} // expected-error{{vectors of over 4 elements in node records are not supported}} + +[Shader("node")] +[NodeLaunch("thread")] +void threader(ThreadNodeInputRecord input) {} // expected-error{{vectors of over 4 elements in node records are not supported}} diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-swizzle.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-swizzle.hlsl new file mode 100644 index 0000000000..472a192677 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-swizzle.hlsl @@ -0,0 +1,7 @@ +// RUN: %dxc -Tlib_6_9 -verify %s + +export +vector doit(vector vec5) { + vec5.x = 1; // expected-error {{Invalid swizzle 'x' on vector of over 4 elements.}} + return vec5.xyw; // expected-error {{Invalid swizzle 'xyw' on vector of over 4 elements.}} +} diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl new file mode 100644 index 0000000000..42eb6b077c --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvecs-sm68.hlsl @@ -0,0 +1,34 @@ +// RUN: %dxc -T ps_6_8 -verify %s + +#define TYPE float +#define NUM 5 + +struct LongVec { + float4 f; + vector vec; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} +}; +groupshared vector gs_vec; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} +groupshared vector gs_vec_arr[10]; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + +static vector static_vec; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} +static vector static_vec_arr[10]; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + +export vector lv_param_passthru( // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + vector vec1) { // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + vector ret = vec1; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + vector arr[10]; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + arr[1]= vec1; + return ret; +} + +export void lv_param_in_out(in vector vec1, // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + out vector vec2) { // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + vec2 = vec1; +} + +export void lv_param_inout(inout vector vec1, // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + inout vector vec2) { // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + vector tmp = vec1; // expected-error{{invalid value, valid range is between 1 and 4 inclusive}} + vec1 = vec2; + vec2 = tmp; +} diff --git a/tools/clang/test/SemaHLSL/hlsl/types/toolong-vectors.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/toolong-vectors.hlsl new file mode 100644 index 0000000000..c1da348695 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/types/toolong-vectors.hlsl @@ -0,0 +1,116 @@ +// RUN: %dxc -T lib_6_9 -DTYPE=float -DNUM=1025 -verify %s +// RUN: %dxc -T ps_6_9 -DTYPE=float -DNUM=1025 -verify %s + +// A test to verify that declarations of longvecs are permitted in all the accepted places. +// Only tests for acceptance, most codegen is ignored for now. + +struct LongVec { + float4 f; + vector vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +}; + +template +struct LongVecTpl { + float4 f; + vector vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +}; + +template +struct LongVecTpl2 { + float4 f; + vector vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +}; + +groupshared vector gs_vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +groupshared vector gs_vec_arr[10]; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +groupshared LongVecTpl gs_vec_tpl; // expected-note{{in instantiation of template class 'LongVecTpl<1025>' requested here}} + +static vector static_vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +static vector static_vec_arr[10]; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +static LongVecTpl2 static_vec_tpl; // expected-note{{in instantiation of template class 'LongVecTpl2<1025>' requested here}} + +export vector // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +lv_param_passthru(vector vec1) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector ret = vec1; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + return ret; +} + +export void lv_param_in_out(in vector vec1, // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + out vector vec2) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vec2 = vec1; +} + +export void lv_param_inout(inout vector vec1, // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + inout vector vec2) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector tmp = vec1; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vec1 = vec2; + vec2 = tmp; +} + +export void lv_global_assign(vector vec) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + static_vec = vec; +} + +export vector lv_global_ret() { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector ret = static_vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + return ret; +} + +export void lv_gs_assign(vector vec) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + gs_vec = vec; +} + +export vector lv_gs_ret() { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector ret = gs_vec; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + return ret; +} + +#define DIMS 10 + +export vector // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} +lv_param_arr_passthru(vector vec)[10] { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector ret[10]; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + for (int i = 0; i < DIMS; i++) + ret[i] = vec; + return ret; +} + +export void lv_global_arr_assign(vector vec[10]) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + for (int i = 0; i < DIMS; i++) + static_vec_arr[i] = vec[i]; +} + +export vector lv_global_arr_ret()[10] { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector ret[10]; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + for (int i = 0; i < DIMS; i++) + ret[i] = static_vec_arr[i]; + return ret; +} + +export void lv_gs_arr_assign(vector vec[10]) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + for (int i = 0; i < DIMS; i++) + gs_vec_arr[i] = vec[i]; +} + +export vector lv_gs_arr_ret()[10] { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector ret[10]; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + for (int i = 0; i < DIMS; i++) + ret[i] = gs_vec_arr[i]; + return ret; +} + +export LongVec lv_param_rec_passthru(LongVec vec) { + LongVec ret = vec; + return ret; +} + +export vector lv_splat(TYPE scalar) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector ret = scalar; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + return ret; +} + +export vector lv_array_cast(TYPE arr[NUM]) { // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + vector ret = (vector)arr; // expected-error{{invalid value, valid range is between 1 and 1024 inclusive}} + return ret; +} + diff --git a/tools/clang/test/SemaHLSL/hlsl/workgraph/dependent_type_for_node_object_template_arg.hlsl b/tools/clang/test/SemaHLSL/hlsl/workgraph/dependent_type_for_node_object_template_arg.hlsl index 40e0452719..05ec268a0c 100644 --- a/tools/clang/test/SemaHLSL/hlsl/workgraph/dependent_type_for_node_object_template_arg.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/workgraph/dependent_type_for_node_object_template_arg.hlsl @@ -60,12 +60,9 @@ void woo() { } template -// expected-note@+1{{zero sized record defined here}} struct ForwardDecl; // expected-note{{template is declared here}} void woot() { - // Forward decl fails because forcing completion to check empty size for node object. - // expected-error@+1{{record used in GroupNodeInputRecords may not have zero size}} GroupNodeInputRecords > data; // expected-error{{implicit instantiation of undefined template 'ForwardDecl'}} foo(data); } diff --git a/tools/clang/test/SemaHLSL/incomplete-type.hlsl b/tools/clang/test/SemaHLSL/incomplete-type.hlsl index 8869b80400..b0d4f1da7f 100644 --- a/tools/clang/test/SemaHLSL/incomplete-type.hlsl +++ b/tools/clang/test/SemaHLSL/incomplete-type.hlsl @@ -1,17 +1,83 @@ -// RUN: %dxc -Tlib_6_3 -Wno-unused-value -verify %s +// RUN: %dxc -Tlib_6_8 -Wno-unused-value -verify %s // Tests that the compiler is well-behaved with regard to uses of incomplete types. // Regression test for GitHub #2058, which crashed in this case. -// expected-note@+4 {{forward declaration of 'S'}} -// expected-note@+3 {{forward declaration of 'S'}} -// expected-note@+2 {{forward declaration of 'S'}} -// expected-note@+1 {{forward declaration of 'S'}} -struct S; + +struct S; // expected-note 24 {{forward declaration of 'S'}} +template struct T; // expected-note 4 {{template is declared here}} + ConstantBuffer CB; // expected-error {{variable has incomplete type 'S'}} +ConstantBuffer > TB; // expected-error {{implicit instantiation of undefined template 'T<1>'}} + +S s; // expected-error {{variable has incomplete type 'S'}} +T<1> t; // expected-error {{implicit instantiation of undefined template 'T<1>'}} + +cbuffer BadBuffy { + S cb_s; // expected-error {{variable has incomplete type 'S'}} + T<1> cb_t; // expected-error {{implicit instantiation of undefined template 'T<1>'}} +}; + +tbuffer BadTuffy { + S tb_s; // expected-error {{variable has incomplete type 'S'}} + T<1> tb_t; // expected-error {{implicit instantiation of undefined template 'T<1>'}} +}; + S func( // expected-error {{incomplete result type 'S' in function definition}} S param) // expected-error {{variable has incomplete type 'S'}} { S local; // expected-error {{variable has incomplete type 'S'}} return (S)0; // expected-error {{'S' is an incomplete type}} } + +[shader("geometry")] +[maxvertexcount(3)] +void gs_point(line S e, // expected-error {{variable has incomplete type 'S'}} + inout PointStream OutputStream0) {} // expected-error {{variable has incomplete type 'S'}} + +[shader("geometry")] +[maxvertexcount(12)] +void gs_line(line S a, // expected-error {{variable has incomplete type 'S'}} + inout LineStream OutputStream0) {} // expected-error {{variable has incomplete type 'S'}} + + +[shader("geometry")] +[maxvertexcount(12)] +void gs_line(line S a, // expected-error {{variable has incomplete type 'S'}} + inout TriangleStream OutputStream0) {} // expected-error {{variable has incomplete type 'S'}} + + +[shader("domain")] +[domain("tri")] +void ds_main(OutputPatch TrianglePatch) {} // expected-error{{variable has incomplete type 'S'}} + +void patch_const(InputPatch inpatch, // expected-error{{variable has incomplete type 'S'}} + OutputPatch outpatch) {} // expected-error{{variable has incomplete type 'S'}} + +[shader("hull")] +[domain("tri")] +[outputtopology("triangle_cw")] +[outputcontrolpoints(32)] +[patchconstantfunc("patch_const")] +void hs_main(InputPatch TrianglePatch) {} // expected-error{{variable has incomplete type 'S'}} + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(8,1,1)] +[NodeMaxDispatchGrid(8,1,1)] +// expected-error@+1{{Broadcasting node shader 'broadcast' with NodeMaxDispatchGrid attribute must declare an input record containing a field with SV_DispatchGrid semantic}} +void broadcast(DispatchNodeInputRecord input, // expected-error{{variable has incomplete type 'S'}} + NodeOutput output) // expected-error{{variable has incomplete type 'S'}} +{ + ThreadNodeOutputRecords touts; // expected-error{{variable has incomplete type 'S'}} + GroupNodeOutputRecords gouts; // expected-error{{variable has incomplete type 'S'}} +} + +[Shader("node")] +[NodeLaunch("coalescing")] +[NumThreads(8,1,1)] +void coalesce(GroupNodeInputRecords input) {} // expected-error{{variable has incomplete type 'S'}} + +[Shader("node")] +[NodeLaunch("thread")] +void threader(ThreadNodeInputRecord input) {} // expected-error{{variable has incomplete type 'S'}} diff --git a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp index c1c844d4be..11effb645b 100644 --- a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp +++ b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp @@ -1440,6 +1440,13 @@ class DxcCompiler : public IDxcCompiler3, Opts.EnablePayloadQualifiers; compiler.getLangOpts().HLSLProfile = compiler.getCodeGenOpts().HLSLProfile = Opts.TargetProfile; + const ShaderModel *SM = hlsl::ShaderModel::GetByName( + compiler.getLangOpts().HLSLProfile.c_str()); + if (SM->IsSM69Plus()) + compiler.getLangOpts().MaxHLSLVectorLength = DXIL::kSM69MaxVectorLength; + else + compiler.getLangOpts().MaxHLSLVectorLength = + DXIL::kDefaultMaxVectorLength; // Enable dumping implicit top level decls either if it was specifically // requested or if we are not dumping the ast from the command line. That diff --git a/tools/clang/unittests/HLSL/LinkerTest.cpp b/tools/clang/unittests/HLSL/LinkerTest.cpp index 7cafa0db06..df8bb644e1 100644 --- a/tools/clang/unittests/HLSL/LinkerTest.cpp +++ b/tools/clang/unittests/HLSL/LinkerTest.cpp @@ -526,6 +526,11 @@ TEST_F(LinkerTest, RunLinkMatArrayParam) { Link(L"main", L"ps_6_0", pLinker, {libName, libName2}, {"alloca [24 x float]", "getelementptr [12 x float], [12 x float]*"}, {}); + + Link(L"main", L"ps_6_9", pLinker, {libName, libName2}, + {"alloca [2 x <12 x float>]", + "getelementptr [12 x float], [12 x float]*"}, + {}); } TEST_F(LinkerTest, RunLinkMatParam) { diff --git a/utils/hct/CMakeLists.txt b/utils/hct/CMakeLists.txt new file mode 100644 index 0000000000..41e6b494e6 --- /dev/null +++ b/utils/hct/CMakeLists.txt @@ -0,0 +1,3 @@ +# generate hlsl_intrinsic_opcodes.json to preserve high level intrinsic opcodes +# This uses CODE_TAG because the file exists in the source tree. +add_hlsl_hctgen(HlslIntrinsicOpcodes OUTPUT hlsl_intrinsic_opcodes.json CODE_TAG) diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 66376c3b9b..66b897945e 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -37,6 +37,32 @@ "array_local_ldst", ] +# These are the valid overload type characters for DXIL instructions. +# - "v" is for void, and is mutually exclusive with the other types. +# - "u" is for user defined type (UDT), and is mutually exclusive with the other +# types. +# - "o" is for an HLSL object type (e.g. Texture, Sampler, etc.), and is +# mutually exclusive with the other types. +# - "<" is for vector overloads, and may be followed by a set of supported +# component types. +# - If "<" is not followed by any component types, any preceding scalar types +# are used. +# - Vector component types are captured into a separate list during +# processing. +# - "x" is for extended overloads, in order to support multiple overload +# dimensions, and is mutually exclusive with the other types. +# - "x" is not supplied manually, but automatically used when processing +# overloads that use "," to separate multiple overload dimensions, which are +# captured into a separate list. +# - "," is used to separate multiple overload dimensions, will be converted +# to a use of a single "x" overload string during processing. +dxil_all_user_oload_chars = "vhfd18wiluo<," +dxil_scalar_oload_chars = "hfd18wil" + +# Maximum number of overload dimensions supported through the extended overload +# in DXIL instructions. +dxil_max_overload_dims = 2 + class db_dxil_enum_value(object): "A representation for a value in an enumeration type" @@ -81,6 +107,9 @@ def __init__(self, name, **kwargs): self.ops = [] # the operands that this instruction takes self.is_allowed = True # whether this instruction is allowed in a DXIL program self.oload_types = "" # overload types if applicable + # Always call process_oload_types() after setting oload_types. + self.extended_oload_types = None # extended overload types if applicable + self.vector_oload_types = None # vector overload types if applicable self.fn_attr = "" # attribute shorthands: rn=does not access memory,ro=only reads from memory, self.is_deriv = False # whether this is some kind of derivative self.is_gradient = False # whether this requires a gradient calculation @@ -98,6 +127,8 @@ def __init__(self, name, **kwargs): self.is_reserved = self.dxil_class == "Reserved" self.shader_model_translated = () # minimum shader model required with translation by linker self.props = {} # extra properties + if self.is_dxil_op: + self.process_oload_types() def __str__(self): return self.name @@ -105,6 +136,125 @@ def __str__(self): def fully_qualified_name(self): return "{}::{}".format(self.fully_qualified_name_prefix, self.name) + def process_oload_types(self): + if type(self.oload_types) is not str: + raise ValueError( + f"overload for '{self.name}' should be a string - use empty if n/a" + ) + # Early out for LLVM instructions + if not self.is_dxil_op: + return + + self.extended_oload_types = [""] * dxil_max_overload_dims + self.vector_oload_types = [""] * dxil_max_overload_dims + + # Early out for void overloads. + if self.oload_types == "v": + return + + if self.oload_types == "": + raise ValueError( + f"overload for '{self.name}' should not be empty - use void if n/a" + ) + if "v" in self.oload_types: + raise ValueError( + f"void overload should be exclusive to other types for '({self.name})'" + ) + + # Process oload_types for extended and vector overloads. + # Contrived example: "hf<, dxil_max_overload_dims: + raise ValueError( + "Too many overload dimensions for DXIL op " + f"{self.name}: '{self.oload_types}'" + ) + for n, oloads in enumerate(oload_types): + if len(oloads) == 0: + raise ValueError( + f"Invalid extended overload type syntax for DXIL op " + f"{self.name}: '{self.oload_types}'" + ) + # split at vector for component overloads, if vector specified + # without following components, use the scalar overloads that + # precede the vector character. + split = oloads.split("<") + if len(split) == 1: + # No vector overload. + continue + elif len(split) != 2: + raise ValueError( + f"Invalid overload types for DXIL op {self.name}: " + f"{self.oload_types}" + ) + + # Split into scalar and vector component overloads. + scalars, vector_oloads = split + if not vector_oloads: + vector_oloads = scalars + if not vector_oloads: + raise ValueError( + "No scalar overload types provided with vector overload " + f"for DXIL op {self.name}: '{self.oload_types}'" + ) + for c in scalars: + if c not in dxil_scalar_oload_chars: + raise ValueError( + "Invalid overload type character used with vector for " + f"DXIL op {self.name}: {c} in '{self.oload_types}'" + ) + oload_types[n] = scalars + "<" + self.vector_oload_types[n] = vector_oloads + if len(oload_types) > 1: + self.oload_types = "x" + self.extended_oload_types[: len(oload_types)] = oload_types + self.check_extended_oload_ops() + else: + self.oload_types = oload_types[0] + + def check_extended_oload_ops(self): + "Ensure ops has sequential extended overload references with $x0, $x1, etc." + next_oload_idx = 0 + for i in self.ops: + if i.llvm_type.startswith("$x"): + if i.llvm_type != "$x" + str(next_oload_idx): + raise ValueError( + "Extended overloads are not sequentially referenced in " + f"DXIL op {self.name}: {i.llvm_type} != $x{next_oload_idx}" + ) + next_oload_idx += 1 + if next_oload_idx != len(self.extended_oload_types): + raise ValueError( + "Extended overloads are not referenced for all overload " + f"dimensions in DXIL op {self.name}: {next_oload_idx} != " + f"{len(self.extended_oload_types)}" + ) + class db_dxil_metadata(object): "A representation for a metadata record" @@ -477,9 +627,7 @@ def populate_categories_and_models(self): "closesthit", ) for i in "GeometryIndex".split(","): - self.name_idx[ - i - ].category = ( + self.name_idx[i].category = ( "Raytracing object space uint System Values, raytracing tier 1.1" ) self.name_idx[i].shader_model = 6, 5 @@ -574,9 +722,7 @@ def populate_categories_and_models(self): self.name_idx[i].shader_model = 6, 3 self.name_idx[i].shader_stages = ("library", "intersection") for i in "CreateHandleForLib".split(","): - self.name_idx[ - i - ].category = ( + self.name_idx[i].category = ( "Library create handle from resource struct (like HL intrinsic)" ) self.name_idx[i].shader_model = 6, 3 @@ -1175,6 +1321,37 @@ def populate_llvm_instructions(self): self.add_llvm_instr( "OTHER", 53, "VAArg", "VAArgInst", "vaarg instruction", "", [] ) + + self.add_llvm_instr( + "OTHER", + 54, + "ExtractElement", + "ExtractElementInst", + "extracts from vector", + "", + [], + ) + + self.add_llvm_instr( + "OTHER", + 55, + "InsertElement", + "InsertElementInst", + "inserts into vector", + "", + [], + ) + + self.add_llvm_instr( + "OTHER", + 56, + "ShuffleVector", + "ShuffleVectorInst", + "Shuffle two vectors", + "", + [], + ) + self.add_llvm_instr( "OTHER", 57, @@ -1348,7 +1525,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "returns the " + i, - "hf", + "hf<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1412,7 +1589,7 @@ def UFI(name, **mappings): next_op_idx, "Binary", "returns the " + i + " of the input values", - "hfd", + "hfd<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1430,7 +1607,7 @@ def UFI(name, **mappings): next_op_idx, "Binary", "returns the " + i + " of the input values", - "wil", + "wil<", "rn", [ db_dxil_param(0, "$o", "", "operation result"), @@ -1502,7 +1679,7 @@ def UFI(name, **mappings): next_op_idx, "Tertiary", "performs a fused multiply add (FMA) of the form a * b + c", - "d", + "d<", "rn", [ db_dxil_param( @@ -5555,6 +5732,83 @@ def UFI(name, **mappings): # Reserved block C next_op_idx = self.reserve_dxil_op_range("ReservedC", next_op_idx, 10) + # Long Vectors + self.add_dxil_op( + "RawBufferVectorLoad", + next_op_idx, + "RawBufferVectorLoad", + "reads from a raw buffer and structured buffer", + "hfwidl<", + "ro", + [ + db_dxil_param(0, "$r", "", "the loaded value"), + db_dxil_param(2, "res", "srv", "handle of TypedBuffer SRV to sample"), + db_dxil_param( + 3, + "i32", + "index", + "element index for StructuredBuffer, or byte offset for ByteAddressBuffer", + ), + db_dxil_param( + 4, + "i32", + "elementOffset", + "offset into element for StructuredBuffer, or undef for ByteAddressBuffer", + ), + db_dxil_param( + 5, + "i32", + "alignment", + "relative load access alignment", + is_const=True, + ), + ], + counters=("tex_load",), + ) + next_op_idx += 1 + + self.add_dxil_op( + "RawBufferVectorStore", + next_op_idx, + "RawBufferVectorStore", + "writes to a RWByteAddressBuffer or RWStructuredBuffer", + "hfwidl<", + "", + [ + db_dxil_param(0, "v", "", ""), + db_dxil_param(2, "res", "uav", "handle of UAV to store to"), + db_dxil_param( + 3, + "i32", + "index", + "element index for StructuredBuffer, or byte offset for ByteAddressBuffer", + ), + db_dxil_param( + 4, + "i32", + "elementOffset", + "offset into element for StructuredBuffer, or undef for ByteAddressBuffer", + ), + db_dxil_param(5, "$o", "value0", "value"), + db_dxil_param( + 6, + "i32", + "alignment", + "relative store access alignment", + is_const=True, + ), + ], + counters=("tex_store",), + ) + next_op_idx += 1 + + # End of DXIL 1.9 opcodes. + self.set_op_count_for_version(1, 9, next_op_idx) + assert next_op_idx == 305, ( + "260 is expected next operation index but encountered %d and thus opcodes are broken" + % next_op_idx + ) + # Set interesting properties. self.build_indices() for ( @@ -5576,18 +5830,6 @@ def UFI(name, **mappings): ) for i in self.instr: self.verify_dense(i.ops, lambda x: x.pos, lambda x: i.name) - for i in self.instr: - if i.is_dxil_op: - assert i.oload_types != "", ( - "overload for DXIL operation %s should not be empty - use void if n/a" - % (i.name) - ) - assert i.oload_types == "v" or i.oload_types.find("v") < 0, ( - "void overload should be exclusive to other types (%s)" % i.name - ) - assert ( - type(i.oload_types) is str - ), "overload for %s should be a string - use empty if n/a" % (i.name) # Verify that all operations in each class have the same signature. import itertools @@ -8247,6 +8489,8 @@ def __init__( self.vulkanSpecific = ns.startswith( "Vk" ) # Vulkan specific intrinsic - SPIRV change + self.opcode = None # high-level opcode assigned later + self.unsigned_opcode = None # unsigned high-level opcode if appicable class db_hlsl_namespace(object): @@ -8292,7 +8536,7 @@ def __init__( class db_hlsl(object): "A database of HLSL language data" - def __init__(self, intrinsic_defs): + def __init__(self, intrinsic_defs, opcode_data): self.base_types = { "bool": "LICOMPTYPE_BOOL", "int": "LICOMPTYPE_INT", @@ -8365,6 +8609,13 @@ def __init__(self, intrinsic_defs): self.populate_attributes() self.opcode_namespace = "hlsl::IntrinsicOp" + # Populate opcode data for HLSL intrinsics. + self.opcode_data = opcode_data + # If opcode data is empty, create the default structure. + if not self.opcode_data: + self.opcode_data["IntrinsicOpCodes"] = {"Num_Intrinsics": 0} + self.assign_opcodes() + def create_namespaces(self): last_ns = None self.namespaces = {} @@ -8869,6 +9120,29 @@ def add_attr_arg(title_name, scope, args, doc): ) self.attributes = attributes + # Iterate through all intrinsics, assigning opcodes to each one. + # This uses the opcode_data to preserve already-assigned opcodes. + def assign_opcodes(self): + "Assign opcodes to the intrinsics." + IntrinsicOpDict = self.opcode_data["IntrinsicOpCodes"] + Num_Intrinsics = self.opcode_data["IntrinsicOpCodes"]["Num_Intrinsics"] + + def add_intrinsic(name): + nonlocal Num_Intrinsics + opcode = IntrinsicOpDict.setdefault(name, Num_Intrinsics) + if opcode == Num_Intrinsics: + Num_Intrinsics += 1 + return opcode + + sorted_intrinsics = sorted(self.intrinsics, key=lambda x: x.key) + for i in sorted_intrinsics: + i.opcode = add_intrinsic(i.enum_name) + for i in sorted_intrinsics: + if i.unsigned_op == "": + continue + i.unsigned_opcode = add_intrinsic(i.unsigned_op) + self.opcode_data["IntrinsicOpCodes"]["Num_Intrinsics"] = Num_Intrinsics + if __name__ == "__main__": db = db_dxil() diff --git a/utils/hct/hctdb_instrhelp.py b/utils/hct/hctdb_instrhelp.py index 17eefd4918..cf81dd04fc 100644 --- a/utils/hct/hctdb_instrhelp.py +++ b/utils/hct/hctdb_instrhelp.py @@ -18,6 +18,29 @@ def get_db_dxil(): return g_db_dxil +# opcode data contains fixed opcode assignments for HLSL intrinsics. +g_hlsl_opcode_data = None + + +def get_hlsl_opcode_data(): + global g_hlsl_opcode_data + if g_hlsl_opcode_data is None: + # Load the intrinsic opcodes from the JSON file. + json_filepath = os.path.join( + os.path.dirname(__file__), "hlsl_intrinsic_opcodes.json" + ) + try: + with open(json_filepath, "r") as file: + g_hlsl_opcode_data = json.load(file) + except FileNotFoundError: + print(f"File not found: {json_filepath}") + except json.JSONDecodeError as e: + print(f"Error decoding JSON from {json_filepath}: {e}") + if not g_hlsl_opcode_data: + g_hlsl_opcode_data = {} + return g_hlsl_opcode_data + + g_db_hlsl = None @@ -26,10 +49,15 @@ def get_db_hlsl(): if g_db_hlsl is None: thisdir = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(thisdir, "gen_intrin_main.txt"), "r") as f: - g_db_hlsl = db_hlsl(f) + g_db_hlsl = db_hlsl(f, get_hlsl_opcode_data()) return g_db_hlsl +def get_max_oload_dims(): + db = get_db_dxil() + return f"const unsigned kDxilMaxOloadDims = {dxil_max_overload_dims};" + + def format_comment(prefix, val): "Formats a value with a line-comment prefix." result = "" @@ -487,25 +515,23 @@ def print_opfunc_props(self): ) ) print( - "// OpCode OpCode name, OpCodeClass OpCodeClass name, void, h, f, d, i1, i8, i16, i32, i64, udt, obj, function attribute" + "// OpCode OpCode name, OpCodeClass OpCodeClass name, void, h, f, d, i1, i8, i16, i32, i64, udt, obj, vec, ext, function attribute, ext oload, vec oload" ) # Example formatted string: - # { OC::TempRegLoad, "TempRegLoad", OCC::TempRegLoad, "tempRegLoad", false, true, true, false, true, false, true, true, false, Attribute::ReadOnly, }, - # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 - # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 + # { OC::TempRegLoad, "TempRegLoad", OCC::TempRegLoad, "tempRegLoad", false, true, true, false, true, false, true, true, false, false, false, false, false, Attribute::ReadOnly, {}, {0x00} }, + # 01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890 + # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 last_category = None - # overload types are a string of (v)oid, (h)alf, (f)loat, (d)ouble, (1)-bit, (8)-bit, (w)ord, (i)nt, (l)ong, u(dt) + # overload types are a string of (v)oid, (h)alf, (f)loat, (d)ouble, (1)-bit, (8)-bit, (w)ord, (i)nt, (l)ong, u(dt), o(bj), vec(t)or, e(x)tended f = lambda i, c: "true" if i.oload_types.find(c) >= 0 else "false" lower_exceptions = { "CBufferLoad": "cbufferLoad", "CBufferLoadLegacy": "cbufferLoadLegacy", "GSInstanceID": "gsInstanceID", } - lower_fn = ( - lambda t: lower_exceptions[t] - if t in lower_exceptions - else t[:1].lower() + t[1:] + lower_fn = lambda t: ( + lower_exceptions[t] if t in lower_exceptions else t[:1].lower() + t[1:] ) attr_dict = { "": "None", @@ -516,19 +542,37 @@ def print_opfunc_props(self): "nr": "NoReturn", "wv": "None", } - attr_fn = lambda i: "Attribute::" + attr_dict[i.fn_attr] + "," + attr_fn = lambda i: "Attribute::" + attr_dict[i.fn_attr] + oload_to_mask = lambda oload: sum( + [1 << dxil_all_user_oload_chars.find(c) for c in oload] + ) + ext_oload_fn = ( + lambda i: "{" + + ",".join(["{0x%x}" % oload_to_mask(o) for o in i.extended_oload_types]) + + "}" + ) + vec_oload_fn = ( + lambda i: "{" + + ",".join(["{0x%x}" % oload_to_mask(o) for o in i.vector_oload_types]) + + "}" + ) for i in self.instrs: if last_category != i.category: if last_category != None: print("") print( - " // {category:118} void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute".format( + " // {category:118} void, h, f, d, i1, i8, i16, i32, i64, udt, obj, vec, function attribute, ext oload, vec oload".format( category=i.category ) ) last_category = i.category print( - " {{ {OC}::{name:24} {quotName:27} {OCC}::{className:25} {classNameQuot:28} {{{v:>6},{h:>6},{f:>6},{d:>6},{b:>6},{e:>6},{w:>6},{i:>6},{l:>6},{u:>6},{o:>6}}}, {attr:20} }},".format( + ( + " {{ {OC}::{name:24} {quotName:27} {OCC}::{className:25} " + + "{classNameQuot:28} {{{v:>6},{h:>6},{f:>6},{d:>6},{b:>6}," + + "{e:>6},{w:>6},{i:>6},{l:>6},{u:>6},{o:>6},{t:>6}," + + "{x:>6}}}, {attr:20}, {ext_oload:2}, {vec_oload:6} }}," + ).format( name=i.name + ",", quotName='"' + i.name + '",', className=i.dxil_class + ",", @@ -544,9 +588,13 @@ def print_opfunc_props(self): l=f(i, "l"), u=f(i, "u"), o=f(i, "o"), + t=f(i, "<"), + x=f(i, "x"), attr=attr_fn(i), OC=self.OC, OCC=self.OCC, + ext_oload=ext_oload_fn(i), + vec_oload=vec_oload_fn(i), ) ) print("};") @@ -599,6 +647,9 @@ def print_opfunc_table(self): "noderecordhandle": "A(pNodeRecordHandle);", "nodeproperty": "A(nodeProperty);", "noderecordproperty": "A(nodeRecordProperty);", + # Extended overload slots, extend as needed: + "$x0": "EXT(0);", + "$x1": "EXT(1);", } last_category = None for i in self.instrs: @@ -629,12 +680,15 @@ def print_opfunc_oload_type(self): obj_ty = "obj" vec_ty = "$vec" gsptr_ty = "$gsptr" + extended_ty = "$x" last_category = None index_dict = collections.OrderedDict() ptr_index_dict = collections.OrderedDict() single_dict = collections.OrderedDict() + extended_dict = collections.OrderedDict() struct_list = [] + extended_list = [] for instr in self.instrs: ret_ty = instr.ops[0].llvm_type @@ -654,6 +708,10 @@ def print_opfunc_oload_type(self): struct_list.append(instr.name) continue + if instr.oload_types == "x": + extended_list.append(instr) + continue + in_param_ty = False # Try to find elt_ty in parameter types. for index, op in enumerate(instr.ops): @@ -708,8 +766,7 @@ def print_opfunc_oload_type(self): "i": "IntegerType::get(Ctx, 32)", "l": "IntegerType::get(Ctx, 64)", "v": "Type::getVoidTy(Ctx)", - "u": "Type::getInt32PtrTy(Ctx)", - "o": "Type::getInt32PtrTy(Ctx)", + # No other types should be referenced here. } assert ty in type_code_texts, "llvm type %s is unknown" % (ty) ty_code = type_code_texts[ty] @@ -769,6 +826,61 @@ def print_opfunc_oload_type(self): line = line + "}" print(line) + for instr in extended_list: + # Collect indices for overloaded return and types, make a tuple of + # indices the key, and add the opcode to a list of opcodes for that + # key. Indices start with 0 for return type, and 1 for the first + # function parameter, which is the DXIL OpCode. + indices = [] + for index, op in enumerate(instr.ops): + # Skip dxil opcode. + if op.pos == 1: + continue + + op_type = op.llvm_type + if op_type.startswith(extended_ty): + try: + extended_index = int(op_type[2:]) + except: + raise ValueError( + "Error parsing extended operand type " + + f"'{op_type}' for DXIL op '{instr.name}'" + ) + if extended_index != len(indices): + raise ValueError( + f"'$x{extended_index}' is not in sequential " + + f"order for DXIL op '{instr.name}'" + ) + indices.append(op.pos) + + if len(indices) != len(instr.extended_oload_types): + raise ValueError( + f"DXIL op {instr.name}: extended overload count " + + "mismatches the number of overload types" + ) + extended_dict.setdefault(tuple(indices), []).append(instr.name) + + def GetTypeAtIndex(index): + if index == 0: + return "FT->getReturnType()" + return f"FT->getParamType({index - 1})" + + for index_tuple, opcodes in extended_dict.items(): + line = "" + for opcode in opcodes: + line = line + f"case OpCode::{opcode}:\n" + if index_tuple[-1] > 0: + line += ( + f" if (FT->getNumParams() < {index_tuple[-1]})\n" + + " return nullptr;\n" + ) + line += ( + " return llvm::StructType::get(Ctx, {" + + ", ".join([GetTypeAtIndex(index) for index in index_tuple]) + + "});\n" + ) + print(line) + class db_valfns_gen: "A generator of validation functions." @@ -1045,22 +1157,22 @@ def wrap_with_ifdef_if_vulkan_specific(intrinsic, text): def enum_hlsl_intrinsics(): db = get_db_hlsl() result = "" - enumed = [] + enumed = set() for i in sorted(db.intrinsics, key=lambda x: x.key): if i.enum_name not in enumed: - enumerant = " %s,\n" % (i.enum_name) - result += wrap_with_ifdef_if_vulkan_specific(i, enumerant) # SPIRV Change - enumed.append(i.enum_name) + result += " %s = %d,\n" % (i.enum_name, i.opcode) + enumed.add(i.enum_name) # unsigned result += " // unsigned\n" for i in sorted(db.intrinsics, key=lambda x: x.key): if i.unsigned_op != "": if i.unsigned_op not in enumed: - result += " %s,\n" % (i.unsigned_op) - enumed.append(i.unsigned_op) + result += " %s = %d,\n" % (i.unsigned_op, i.unsigned_opcode) + enumed.add(i.unsigned_op) - result += " Num_Intrinsics,\n" + Num_Intrinsics = get_hlsl_opcode_data()["IntrinsicOpCodes"]["Num_Intrinsics"] + result += " Num_Intrinsics = %d,\n" % (Num_Intrinsics) return result @@ -1570,6 +1682,7 @@ def get_highest_released_shader_model(): ) return result + def get_highest_shader_model(): result = """static const unsigned kHighestMajor = %d; static const unsigned kHighestMinor = %d;""" % ( @@ -1578,6 +1691,7 @@ def get_highest_shader_model(): ) return result + def get_dxil_version_minor(): return "const unsigned kDxilMinor = %d;" % highest_minor diff --git a/utils/hct/hctgen.py b/utils/hct/hctgen.py index dbb7e3a745..1421fbfad5 100755 --- a/utils/hct/hctgen.py +++ b/utils/hct/hctgen.py @@ -2,6 +2,7 @@ import argparse from hctdb_instrhelp import * from hctdb import * +import json import sys import os import CodeTags @@ -28,6 +29,7 @@ "DxilCounters", "DxilMetadata", "RDAT_LibraryTypes", + "HlslIntrinsicOpcodes", ], ) parser.add_argument("--output", required=True) @@ -232,6 +234,14 @@ def writeDxilPIXPasses(args): return 0 +def writeHlslIntrinsicOpcodes(args): + out = openOutput(args) + # get_db_hlsl() initializes the hlsl intrinsic database and opcode_data. + get_db_hlsl() + json.dump(get_hlsl_opcode_data(), out, indent=2) + out.write("\n") + return 0 + args = parser.parse_args() if args.force_lf and args.force_crlf: eprint("--force-lf and --force-crlf are mutually exclusive, only pass one") diff --git a/utils/hct/hlsl_intrinsic_opcodes.json b/utils/hct/hlsl_intrinsic_opcodes.json new file mode 100644 index 0000000000..48a0b74c17 --- /dev/null +++ b/utils/hct/hlsl_intrinsic_opcodes.json @@ -0,0 +1,363 @@ +{ + "IntrinsicOpCodes": { + "Num_Intrinsics": 358, + "IOP_AcceptHitAndEndSearch": 0, + "IOP_AddUint64": 1, + "IOP_AllMemoryBarrier": 2, + "IOP_AllMemoryBarrierWithGroupSync": 3, + "IOP_AllocateRayQuery": 4, + "IOP_Barrier": 5, + "IOP_CallShader": 6, + "IOP_CheckAccessFullyMapped": 7, + "IOP_CreateResourceFromHeap": 8, + "IOP_D3DCOLORtoUBYTE4": 9, + "IOP_DeviceMemoryBarrier": 10, + "IOP_DeviceMemoryBarrierWithGroupSync": 11, + "IOP_DispatchMesh": 12, + "IOP_DispatchRaysDimensions": 13, + "IOP_DispatchRaysIndex": 14, + "IOP_EvaluateAttributeAtSample": 15, + "IOP_EvaluateAttributeCentroid": 16, + "IOP_EvaluateAttributeSnapped": 17, + "IOP_GeometryIndex": 18, + "IOP_GetAttributeAtVertex": 19, + "IOP_GetRemainingRecursionLevels": 20, + "IOP_GetRenderTargetSampleCount": 21, + "IOP_GetRenderTargetSamplePosition": 22, + "IOP_GroupMemoryBarrier": 23, + "IOP_GroupMemoryBarrierWithGroupSync": 24, + "IOP_HitKind": 25, + "IOP_IgnoreHit": 26, + "IOP_InstanceID": 27, + "IOP_InstanceIndex": 28, + "IOP_InterlockedAdd": 29, + "IOP_InterlockedAnd": 30, + "IOP_InterlockedCompareExchange": 31, + "IOP_InterlockedCompareExchangeFloatBitwise": 32, + "IOP_InterlockedCompareStore": 33, + "IOP_InterlockedCompareStoreFloatBitwise": 34, + "IOP_InterlockedExchange": 35, + "IOP_InterlockedMax": 36, + "IOP_InterlockedMin": 37, + "IOP_InterlockedOr": 38, + "IOP_InterlockedXor": 39, + "IOP_IsHelperLane": 40, + "IOP_NonUniformResourceIndex": 41, + "IOP_ObjectRayDirection": 42, + "IOP_ObjectRayOrigin": 43, + "IOP_ObjectToWorld": 44, + "IOP_ObjectToWorld3x4": 45, + "IOP_ObjectToWorld4x3": 46, + "IOP_PrimitiveIndex": 47, + "IOP_Process2DQuadTessFactorsAvg": 48, + "IOP_Process2DQuadTessFactorsMax": 49, + "IOP_Process2DQuadTessFactorsMin": 50, + "IOP_ProcessIsolineTessFactors": 51, + "IOP_ProcessQuadTessFactorsAvg": 52, + "IOP_ProcessQuadTessFactorsMax": 53, + "IOP_ProcessQuadTessFactorsMin": 54, + "IOP_ProcessTriTessFactorsAvg": 55, + "IOP_ProcessTriTessFactorsMax": 56, + "IOP_ProcessTriTessFactorsMin": 57, + "IOP_QuadAll": 58, + "IOP_QuadAny": 59, + "IOP_QuadReadAcrossDiagonal": 60, + "IOP_QuadReadAcrossX": 61, + "IOP_QuadReadAcrossY": 62, + "IOP_QuadReadLaneAt": 63, + "IOP_RayFlags": 64, + "IOP_RayTCurrent": 65, + "IOP_RayTMin": 66, + "IOP_ReportHit": 67, + "IOP_SetMeshOutputCounts": 68, + "IOP_TraceRay": 69, + "IOP_WaveActiveAllEqual": 70, + "IOP_WaveActiveAllTrue": 71, + "IOP_WaveActiveAnyTrue": 72, + "IOP_WaveActiveBallot": 73, + "IOP_WaveActiveBitAnd": 74, + "IOP_WaveActiveBitOr": 75, + "IOP_WaveActiveBitXor": 76, + "IOP_WaveActiveCountBits": 77, + "IOP_WaveActiveMax": 78, + "IOP_WaveActiveMin": 79, + "IOP_WaveActiveProduct": 80, + "IOP_WaveActiveSum": 81, + "IOP_WaveGetLaneCount": 82, + "IOP_WaveGetLaneIndex": 83, + "IOP_WaveIsFirstLane": 84, + "IOP_WaveMatch": 85, + "IOP_WaveMultiPrefixBitAnd": 86, + "IOP_WaveMultiPrefixBitOr": 87, + "IOP_WaveMultiPrefixBitXor": 88, + "IOP_WaveMultiPrefixCountBits": 89, + "IOP_WaveMultiPrefixProduct": 90, + "IOP_WaveMultiPrefixSum": 91, + "IOP_WavePrefixCountBits": 92, + "IOP_WavePrefixProduct": 93, + "IOP_WavePrefixSum": 94, + "IOP_WaveReadLaneAt": 95, + "IOP_WaveReadLaneFirst": 96, + "IOP_WorldRayDirection": 97, + "IOP_WorldRayOrigin": 98, + "IOP_WorldToObject": 99, + "IOP_WorldToObject3x4": 100, + "IOP_WorldToObject4x3": 101, + "IOP_abort": 102, + "IOP_abs": 103, + "IOP_acos": 104, + "IOP_all": 105, + "IOP_and": 106, + "IOP_any": 107, + "IOP_asdouble": 108, + "IOP_asfloat": 109, + "IOP_asfloat16": 110, + "IOP_asin": 111, + "IOP_asint": 112, + "IOP_asint16": 113, + "IOP_asuint": 114, + "IOP_asuint16": 115, + "IOP_atan": 116, + "IOP_atan2": 117, + "IOP_ceil": 118, + "IOP_clamp": 119, + "IOP_clip": 120, + "IOP_cos": 121, + "IOP_cosh": 122, + "IOP_countbits": 123, + "IOP_cross": 124, + "IOP_ddx": 125, + "IOP_ddx_coarse": 126, + "IOP_ddx_fine": 127, + "IOP_ddy": 128, + "IOP_ddy_coarse": 129, + "IOP_ddy_fine": 130, + "IOP_degrees": 131, + "IOP_determinant": 132, + "IOP_distance": 133, + "IOP_dot": 134, + "IOP_dot2add": 135, + "IOP_dot4add_i8packed": 136, + "IOP_dot4add_u8packed": 137, + "IOP_dst": 138, + "IOP_exp": 139, + "IOP_exp2": 140, + "IOP_f16tof32": 141, + "IOP_f32tof16": 142, + "IOP_faceforward": 143, + "IOP_firstbithigh": 144, + "IOP_firstbitlow": 145, + "IOP_floor": 146, + "IOP_fma": 147, + "IOP_fmod": 148, + "IOP_frac": 149, + "IOP_frexp": 150, + "IOP_fwidth": 151, + "IOP_isfinite": 152, + "IOP_isinf": 153, + "IOP_isnan": 154, + "IOP_ldexp": 155, + "IOP_length": 156, + "IOP_lerp": 157, + "IOP_lit": 158, + "IOP_log": 159, + "IOP_log10": 160, + "IOP_log2": 161, + "IOP_mad": 162, + "IOP_max": 163, + "IOP_min": 164, + "IOP_modf": 165, + "IOP_msad4": 166, + "IOP_mul": 167, + "IOP_normalize": 168, + "IOP_or": 169, + "IOP_pack_clamp_s8": 170, + "IOP_pack_clamp_u8": 171, + "IOP_pack_s8": 172, + "IOP_pack_u8": 173, + "IOP_pow": 174, + "IOP_printf": 175, + "IOP_radians": 176, + "IOP_rcp": 177, + "IOP_reflect": 178, + "IOP_refract": 179, + "IOP_reversebits": 180, + "IOP_round": 181, + "IOP_rsqrt": 182, + "IOP_saturate": 183, + "IOP_select": 184, + "IOP_sign": 185, + "IOP_sin": 186, + "IOP_sincos": 187, + "IOP_sinh": 188, + "IOP_smoothstep": 189, + "IOP_source_mark": 190, + "IOP_sqrt": 191, + "IOP_step": 192, + "IOP_tan": 193, + "IOP_tanh": 194, + "IOP_tex1D": 195, + "IOP_tex1Dbias": 196, + "IOP_tex1Dgrad": 197, + "IOP_tex1Dlod": 198, + "IOP_tex1Dproj": 199, + "IOP_tex2D": 200, + "IOP_tex2Dbias": 201, + "IOP_tex2Dgrad": 202, + "IOP_tex2Dlod": 203, + "IOP_tex2Dproj": 204, + "IOP_tex3D": 205, + "IOP_tex3Dbias": 206, + "IOP_tex3Dgrad": 207, + "IOP_tex3Dlod": 208, + "IOP_tex3Dproj": 209, + "IOP_texCUBE": 210, + "IOP_texCUBEbias": 211, + "IOP_texCUBEgrad": 212, + "IOP_texCUBElod": 213, + "IOP_texCUBEproj": 214, + "IOP_transpose": 215, + "IOP_trunc": 216, + "IOP_unpack_s8s16": 217, + "IOP_unpack_s8s32": 218, + "IOP_unpack_u8u16": 219, + "IOP_unpack_u8u32": 220, + "IOP_VkRawBufferLoad": 221, + "IOP_VkRawBufferStore": 222, + "IOP_VkReadClock": 223, + "IOP_Vkext_execution_mode": 224, + "IOP_Vkext_execution_mode_id": 225, + "MOP_Append": 226, + "MOP_RestartStrip": 227, + "MOP_CalculateLevelOfDetail": 228, + "MOP_CalculateLevelOfDetailUnclamped": 229, + "MOP_GetDimensions": 230, + "MOP_Load": 231, + "MOP_Sample": 232, + "MOP_SampleBias": 233, + "MOP_SampleCmp": 234, + "MOP_SampleCmpBias": 235, + "MOP_SampleCmpGrad": 236, + "MOP_SampleCmpLevel": 237, + "MOP_SampleCmpLevelZero": 238, + "MOP_SampleGrad": 239, + "MOP_SampleLevel": 240, + "MOP_Gather": 241, + "MOP_GatherAlpha": 242, + "MOP_GatherBlue": 243, + "MOP_GatherCmp": 244, + "MOP_GatherCmpAlpha": 245, + "MOP_GatherCmpBlue": 246, + "MOP_GatherCmpGreen": 247, + "MOP_GatherCmpRed": 248, + "MOP_GatherGreen": 249, + "MOP_GatherRaw": 250, + "MOP_GatherRed": 251, + "MOP_GetSamplePosition": 252, + "MOP_Load2": 253, + "MOP_Load3": 254, + "MOP_Load4": 255, + "MOP_InterlockedAdd": 256, + "MOP_InterlockedAdd64": 257, + "MOP_InterlockedAnd": 258, + "MOP_InterlockedAnd64": 259, + "MOP_InterlockedCompareExchange": 260, + "MOP_InterlockedCompareExchange64": 261, + "MOP_InterlockedCompareExchangeFloatBitwise": 262, + "MOP_InterlockedCompareStore": 263, + "MOP_InterlockedCompareStore64": 264, + "MOP_InterlockedCompareStoreFloatBitwise": 265, + "MOP_InterlockedExchange": 266, + "MOP_InterlockedExchange64": 267, + "MOP_InterlockedExchangeFloat": 268, + "MOP_InterlockedMax": 269, + "MOP_InterlockedMax64": 270, + "MOP_InterlockedMin": 271, + "MOP_InterlockedMin64": 272, + "MOP_InterlockedOr": 273, + "MOP_InterlockedOr64": 274, + "MOP_InterlockedXor": 275, + "MOP_InterlockedXor64": 276, + "MOP_Store": 277, + "MOP_Store2": 278, + "MOP_Store3": 279, + "MOP_Store4": 280, + "MOP_DecrementCounter": 281, + "MOP_IncrementCounter": 282, + "MOP_Consume": 283, + "MOP_WriteSamplerFeedback": 284, + "MOP_WriteSamplerFeedbackBias": 285, + "MOP_WriteSamplerFeedbackGrad": 286, + "MOP_WriteSamplerFeedbackLevel": 287, + "MOP_Abort": 288, + "MOP_CandidateGeometryIndex": 289, + "MOP_CandidateInstanceContributionToHitGroupIndex": 290, + "MOP_CandidateInstanceID": 291, + "MOP_CandidateInstanceIndex": 292, + "MOP_CandidateObjectRayDirection": 293, + "MOP_CandidateObjectRayOrigin": 294, + "MOP_CandidateObjectToWorld3x4": 295, + "MOP_CandidateObjectToWorld4x3": 296, + "MOP_CandidatePrimitiveIndex": 297, + "MOP_CandidateProceduralPrimitiveNonOpaque": 298, + "MOP_CandidateTriangleBarycentrics": 299, + "MOP_CandidateTriangleFrontFace": 300, + "MOP_CandidateTriangleRayT": 301, + "MOP_CandidateType": 302, + "MOP_CandidateWorldToObject3x4": 303, + "MOP_CandidateWorldToObject4x3": 304, + "MOP_CommitNonOpaqueTriangleHit": 305, + "MOP_CommitProceduralPrimitiveHit": 306, + "MOP_CommittedGeometryIndex": 307, + "MOP_CommittedInstanceContributionToHitGroupIndex": 308, + "MOP_CommittedInstanceID": 309, + "MOP_CommittedInstanceIndex": 310, + "MOP_CommittedObjectRayDirection": 311, + "MOP_CommittedObjectRayOrigin": 312, + "MOP_CommittedObjectToWorld3x4": 313, + "MOP_CommittedObjectToWorld4x3": 314, + "MOP_CommittedPrimitiveIndex": 315, + "MOP_CommittedRayT": 316, + "MOP_CommittedStatus": 317, + "MOP_CommittedTriangleBarycentrics": 318, + "MOP_CommittedTriangleFrontFace": 319, + "MOP_CommittedWorldToObject3x4": 320, + "MOP_CommittedWorldToObject4x3": 321, + "MOP_Proceed": 322, + "MOP_RayFlags": 323, + "MOP_RayTMin": 324, + "MOP_TraceRayInline": 325, + "MOP_WorldRayDirection": 326, + "MOP_WorldRayOrigin": 327, + "MOP_Count": 328, + "MOP_FinishedCrossGroupSharing": 329, + "MOP_GetGroupNodeOutputRecords": 330, + "MOP_GetThreadNodeOutputRecords": 331, + "MOP_IsValid": 332, + "MOP_GroupIncrementOutputCount": 333, + "MOP_ThreadIncrementOutputCount": 334, + "MOP_OutputComplete": 335, + "MOP_SubpassLoad": 336, + "IOP_InterlockedUMax": 337, + "IOP_InterlockedUMin": 338, + "IOP_WaveActiveUMax": 339, + "IOP_WaveActiveUMin": 340, + "IOP_WaveActiveUProduct": 341, + "IOP_WaveActiveUSum": 342, + "IOP_WaveMultiPrefixUProduct": 343, + "IOP_WaveMultiPrefixUSum": 344, + "IOP_WavePrefixUProduct": 345, + "IOP_WavePrefixUSum": 346, + "IOP_uabs": 347, + "IOP_uclamp": 348, + "IOP_udot": 349, + "IOP_ufirstbithigh": 350, + "IOP_umad": 351, + "IOP_umax": 352, + "IOP_umin": 353, + "IOP_umul": 354, + "IOP_usign": 355, + "MOP_InterlockedUMax": 356, + "MOP_InterlockedUMin": 357 + } +}