microsoft · pow2clk · Feb 3, 2025 · Jan 23, 2025 · pow2clk · Feb 3, 2025
diff --git a/include/dxc/dxcapi.internal.h b/include/dxc/dxcapi.internal.h
@@ -44,10 +44,13 @@ enum LEGAL_INTRINSIC_TEMPLATES {
   LITEMPLATE_MATRIX = 3, // Matrix types (eg. float3x3).
   LITEMPLATE_ANY =
       4, // Any one of scalar, vector or matrix types (but not object).
-  LITEMPLATE_OBJECT = 5, // Object types.
-  LITEMPLATE_ARRAY = 6,  // Scalar array.
+  LITEMPLATE_OBJECT = 5,      // Object types.
+  LITEMPLATE_ARRAY = 6,       // Scalar array.
+  LITEMPLATE_SCALAR_ONLY = 7, // Uncastable scalar types.
+  LITEMPLATE_VECTOR_ONLY = 8, // Uncastable vector types (eg. float3).
+  LITEMPLATE_MATRIX_ONLY = 9, // Uncastable matrix types (eg. float3x3).
 
-  LITEMPLATE_COUNT = 7
+  LITEMPLATE_COUNT = 10
 };
 
 // INTRIN_COMPTYPE_FROM_TYPE_ELT0 is for object method intrinsics to indicate

diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp
@@ -4196,12 +4196,10 @@ void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
   loadArgs.emplace_back(opArg);         // opcode
   loadArgs.emplace_back(helper.handle); // resource handle
 
+  // offsets
   if (opcode == OP::OpCode::TextureLoad) {
     // set mip level
     loadArgs.emplace_back(helper.mipLevel);
-  }
-
-  if (opcode == OP::OpCode::TextureLoad) {
     // texture coord
     unsigned coordSize = DxilResource::GetNumCoords(RK);
     bool isVectorAddr = helper.addr->getType()->isVectorTy();
@@ -4213,22 +4211,6 @@ void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
       } else
         loadArgs.emplace_back(undefI);
     }
-  } else {
-    if (helper.addr->getType()->isVectorTy()) {
-      Value *scalarOffset =
-          Builder.CreateExtractElement(helper.addr, (uint64_t)0);
-
-      // TODO: calculate the real address based on opcode
-
-      loadArgs.emplace_back(scalarOffset); // offset
-    } else {
-      // TODO: calculate the real address based on opcode
-
-      loadArgs.emplace_back(helper.addr); // offset
-    }
-  }
-  // offset 0
-  if (opcode == OP::OpCode::TextureLoad) {
     if (helper.offset && !isa<llvm::UndefValue>(helper.offset)) {
       unsigned offsetSize = DxilResource::GetNumOffsets(RK);
       for (unsigned i = 0; i < 3; i++) {
@@ -4242,11 +4224,9 @@ void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
       loadArgs.emplace_back(undefI);
       loadArgs.emplace_back(undefI);
     }
-  }
-
-  // Offset 1
-  if (RK == DxilResource::Kind::TypedBuffer) {
-    loadArgs.emplace_back(undefI);
+  } else {
+    loadArgs.emplace_back(helper.addr); // c0
+    loadArgs.emplace_back(undefI);      // c1
   }
 
   Value *ResRet = Builder.CreateCall(F, loadArgs, OP->GetOpCodeName(opcode));
@@ -4420,12 +4400,7 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
   if (RK == DxilResource::Kind::RawBuffer ||
       RK == DxilResource::Kind::TypedBuffer) {
     // Offset 0
-    if (offset->getType()->isVectorTy()) {
-      Value *scalarOffset = Builder.CreateExtractElement(offset, (uint64_t)0);
-      storeArgs.emplace_back(scalarOffset); // offset
-    } else {
-      storeArgs.emplace_back(offset); // offset
-    }
+    storeArgs.emplace_back(offset); // offset
 
     // Store offset0 for later use
     offset0Idx = storeArgs.size() - 1;

diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -662,6 +662,7 @@ enum ArTypeObjectKind {
                      // indexer object used to implement .mips[1].
   AR_TOBJ_STRING,    // Represents a string
   AR_TOBJ_DEPENDENT, // Dependent type for template.
+  AR_TOBJ_NOCAST,    // Parameter should not have layout casts (splat,trunc)
 };
 
 enum TYPE_CONVERSION_FLAGS {
@@ -989,9 +990,18 @@ static const ArTypeObjectKind g_NullTT[] = {AR_TOBJ_VOID, AR_TOBJ_UNKNOWN};
 
 static const ArTypeObjectKind g_ArrayTT[] = {AR_TOBJ_ARRAY, AR_TOBJ_UNKNOWN};
 
+static const ArTypeObjectKind g_ScalarOnlyTT[] = {
+    AR_TOBJ_SCALAR, AR_TOBJ_NOCAST, AR_TOBJ_UNKNOWN};
+
+static const ArTypeObjectKind g_VectorOnlyTT[] = {
+    AR_TOBJ_VECTOR, AR_TOBJ_NOCAST, AR_TOBJ_UNKNOWN};
+
+static const ArTypeObjectKind g_MatrixOnlyTT[] = {
+    AR_TOBJ_MATRIX, AR_TOBJ_NOCAST, AR_TOBJ_UNKNOWN};
+
 const ArTypeObjectKind *g_LegalIntrinsicTemplates[] = {
-    g_NullTT, g_ScalarTT, g_VectorTT, g_MatrixTT,
-    g_AnyTT,  g_ObjectTT, g_ArrayTT,
+    g_NullTT,   g_ScalarTT, g_VectorTT,     g_MatrixTT,     g_AnyTT,
+    g_ObjectTT, g_ArrayTT,  g_ScalarOnlyTT, g_VectorOnlyTT, g_MatrixOnlyTT,
 };
 C_ASSERT(ARRAYSIZE(g_LegalIntrinsicTemplates) == LITEMPLATE_COUNT);
 
@@ -6113,7 +6123,7 @@ bool HLSLExternalSource::MatchArguments(
   ArBasicKind
       ComponentType[MaxIntrinsicArgs]; // Component type for each argument,
                                        // AR_BASIC_UNKNOWN if unspecified.
-  UINT uSpecialSize[IA_SPECIAL_SLOTS]; // row/col matching types, UNUSED_INDEX32
+  UINT uSpecialSize[IA_SPECIAL_SLOTS]; // row/col matching types, UnusedSize
                                        // if unspecified.
   badArgIdx = MaxIntrinsicArgs;
 
@@ -6249,12 +6259,15 @@ bool HLSLExternalSource::MatchArguments(
         "otherwise intrinsic table was modified and g_MaxIntrinsicParamCount "
         "was not updated (or uTemplateId is out of bounds)");
 
-    // Compare template
+    // Compare template to any type matching params requirements.
     if ((AR_TOBJ_UNKNOWN == Template[pIntrinsicArg->uTemplateId]) ||
         ((AR_TOBJ_SCALAR == Template[pIntrinsicArg->uTemplateId]) &&
          (AR_TOBJ_VECTOR == TypeInfoShapeKind ||
           AR_TOBJ_MATRIX == TypeInfoShapeKind))) {
-      // Unrestricted or truncation of tuples to scalars are allowed
+      // Previous params gave no type restrictions
+      // or truncation of tuples to scalars are allowed
+      // Later steps harmonize common typed params and will always convert the
+      // earlier arg into a splat instead.
       Template[pIntrinsicArg->uTemplateId] = TypeInfoShapeKind;
     } else if (AR_TOBJ_SCALAR == TypeInfoShapeKind) {
       if (AR_TOBJ_SCALAR != Template[pIntrinsicArg->uTemplateId] &&
@@ -6292,6 +6305,11 @@ bool HLSLExternalSource::MatchArguments(
       }
     }
 
+    // If the intrinsic parameter has variable rows or columns but must match
+    // other argument dimensions, it will be specified in pIntrinsicArg with
+    // a special value indicating that the dimension depends on passed values.
+    // uSpecialSize stores the dimensions of the actual passed type.
+
     // Rows
     if (AR_TOBJ_SCALAR != TypeInfoShapeKind) {
       if (pIntrinsicArg->uRows >= IA_SPECIAL_BASE) {
@@ -6398,18 +6416,39 @@ bool HLSLExternalSource::MatchArguments(
     const ArTypeObjectKind *pTT =
         g_LegalIntrinsicTemplates[pArgument->uLegalTemplates];
     if (AR_TOBJ_UNKNOWN != Template[i]) {
-      if ((AR_TOBJ_SCALAR == Template[i]) &&
-          (AR_TOBJ_VECTOR == *pTT || AR_TOBJ_MATRIX == *pTT)) {
-        Template[i] = *pTT;
-      } else {
+      // See if a perfect match overload is available
+      while (AR_TOBJ_UNKNOWN != *pTT && AR_TOBJ_NOCAST != *pTT) {
+        if (Template[i] == *pTT)
+          break;
+        pTT++;
+      }
+
+      if (AR_TOBJ_UNKNOWN == *pTT) {
+        // Perfect match failed and casts are allowed.
+        // Try splats and truncations to get a match.
+        pTT = g_LegalIntrinsicTemplates[pArgument->uLegalTemplates];
         while (AR_TOBJ_UNKNOWN != *pTT) {
-          if (Template[i] == *pTT)
+          if (AR_TOBJ_SCALAR == Template[i] &&
+              (AR_TOBJ_VECTOR == *pTT || AR_TOBJ_MATRIX == *pTT)) {
+            // If a scalar was passed in and the expected value was
+            // matrix/vector convert to the template type for a splat.
+            // Only applicable to VectorTT and MatrixTT,
+            // since the vec/mtx has to be first in the list.
+            Template[i] = *pTT;
             break;
+          } else if (AR_TOBJ_VECTOR == Template[i] && AR_TOBJ_SCALAR == *pTT) {
+            // If a vector was passed in and the expected value was scalar
+            // convert to the template type for a truncation.
+            // Only applicable to ScalarTT,
+            // since the scalar has to be first in the list.
+            Template[i] = AR_TOBJ_SCALAR;
+            break;
+          }
           pTT++;
         }
       }
 
-      if (AR_TOBJ_UNKNOWN == *pTT) {
+      if (AR_TOBJ_UNKNOWN == *pTT || AR_TOBJ_NOCAST == *pTT) {
         Template[i] = g_LegalIntrinsicTemplates[pArgument->uLegalTemplates][0];
         badArgIdx = std::min(badArgIdx, i);
       }